iOS 多线程（GCD源码分析）这个其实是个函数，针对编译器优化的一个函数，后面几个宏是对这个函数的封装，所以提前拎出来

GCD源码中常见的宏

__builtin_expect

这个其实是个函数，针对编译器优化的一个函数，后面几个宏是对这个函数的封装，所以提前拎出来说一下。写代码中我们经常会遇到条件判断语句

if(今天是工作日) {
    printf("好好上班");
}else{
    printf("好好睡觉");
}

CPU读取指令的时候并非一条一条的来读，而是多条一起加载进来，比如已经加载了if(今天是工作日) printf(“好好上班”);的指令，这时候条件式如果为非，也就是非工作日，那么CPU继续把printf(“好好睡觉”);这条指令加载进来，这样就造成了性能浪费的现象。
__builtin_expect的第一个参数是实际值，第二个参数是预测值。使用这个目的是告诉编译器if条件式是不是有更大的可能被满足。

likely&unlikely

解开这个宏后其实是对__builtin_expect封装，likely表示更大可能成立，unlikely表示更大可能不成立。

#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)

遇到这样的，if(likely(a == 0))理解成if(a==0)即可，unlikely也是同样的。

fastpath&slowpath

跟上面也是差不多的，fastpath表示更大可能成立，slowpath表示更大可能不成立

#define fastpath(x) ((typeof(x))__builtin_expect(_safe_cast_to_long(x), ~0l))
#define slowpath(x) ((typeof(x))__builtin_expect(_safe_cast_to_long(x), 0l))

这两个理解起来跟likely和unlikely一样，只需要关注里面的条件式是否满足即可。

os_atomic_cmpxchg

其内部就是atomic_compare_exchange_strong_explicit函数，这个函数的作用是：第二个参数与第一个参数值比较，如果相等，第三个参数的值替换第一个参数的值。如果不相等，把第一个参数的值赋值到第二个参数上。

#define os_atomic_cmpxchg(p, e, v, m) \
        ({ _os_atomic_basetypeof(p) _r = (e); \
        atomic_compare_exchange_strong_explicit(_os_atomic_c11_atomic(p), \
        &_r, v, memory_order_##m, memory_order_relaxed); })

os_atomic_store2o

// 将第二个参数，保存到第一个参数

#define os_atomic_store2o(p, f, v, m)  os_atomic_store(&(p)->f, (v), m)
#define os_atomic_store(p, v, m) \
      atomic_store_explicit(_os_atomic_c11_atomic(p), v, memory_order_##m)

os_atomic_inc_orig

// 将1保存到第一个参数中
#define os_atomic_inc_orig(p, m)  os_atomic_add_orig((p), 1, m)
#define os_atomic_add_orig(p, v, m) _os_atomic_c11_op_orig((p), (v), m, add, +)
#define _os_atomic_c11_op_orig(p, v, m, o, op) \
        atomic_fetch_##o##_explicit(_os_atomic_c11_atomic(p), v, \
        memory_order_##m)

GCD源码中常见的结构体

dispatch_queue_t

// dispatch_queue_t的定义
typedef struct dispatch_queue_s *dispatch_queue_t;
// 继续查看dispatch_queue_s结构体
struct dispatch_queue_s {
	DISPATCH_QUEUE_CLASS_HEADER(queue, void *__dq_opaque1);
	/* 32bit hole on LP64 */
} DISPATCH_ATOMIC64_ALIGN;
// 拆解DISPATCH_QUEUE_CLASS_HEADER这个宏
#if OS_OBJECT_HAVE_OBJC1
#define _DISPATCH_QUEUE_CLASS_HEADER(x, __pointer_sized_field__) \
	DISPATCH_OBJECT_HEADER(x); \
	DISPATCH_UNION_LE(uint64_t volatile dq_state, \
			dispatch_lock dq_state_lock, \
			uint32_t dq_state_bits \
	); \
	__pointer_sized_field__
#else
#define _DISPATCH_QUEUE_CLASS_HEADER(x, __pointer_sized_field__) \
	DISPATCH_OBJECT_HEADER(x); \
	__pointer_sized_field__; \
	DISPATCH_UNION_LE(uint64_t volatile dq_state, \
			dispatch_lock dq_state_lock, \
			uint32_t dq_state_bits \
	)
#endif

#define DISPATCH_QUEUE_CLASS_HEADER(x, __pointer_sized_field__) \
	_DISPATCH_QUEUE_CLASS_HEADER(x, __pointer_sized_field__); \
	/* LP64 global queue cacheline boundary */ \
	unsigned long dq_serialnum; \
	const char *dq_label; \
	DISPATCH_UNION_LE(uint32_t volatile dq_atomic_flags, \
		const uint16_t dq_width, \
		const uint16_t __dq_opaque2 \
	); \
	dispatch_priority_t dq_priority; \
	union { \
		struct dispatch_queue_specific_head_s *dq_specific_head; \
		struct dispatch_source_refs_s *ds_refs; \
		struct dispatch_timer_source_refs_s *ds_timer_refs; \
		struct dispatch_mach_recv_refs_s *dm_recv_refs; \
	}; \
	int volatile dq_sref_cnt
	
// 还有一层宏DISPATCH_OBJECT_HEADER
#define DISPATCH_OBJECT_HEADER(x) \
	struct dispatch_object_s _as_do[0]; \
	_DISPATCH_OBJECT_HEADER(x)

先说明下这个##的作用就拼接成字符串，比如x为group的话，下面就会拼接为dispatch_group

#define _DISPATCH_OBJECT_HEADER(x) \
	struct _os_object_s _as_os_obj[0]; \
	OS_OBJECT_STRUCT_HEADER(dispatch_##x); \
	struct dispatch_##x##_s *volatile do_next; \
	struct dispatch_queue_s *do_targetq; \
	void *do_ctxt; \
	void *do_finalizer

来到OS_OBJECT_STRUCT_HEADER之后，我们需要注意一个成员变量，记住这个成员变量名字叫做do_vtable。再继续拆解_OS_OBJECT_HEADER发现里面起就是一个isa指针和引用计数一些信息。

#define OS_OBJECT_STRUCT_HEADER(x) \
	_OS_OBJECT_HEADER(\
	const void *_objc_isa, \
	do_ref_cnt, \
	do_xref_cnt); \
	const struct x##_vtable_s *do_vtable

#define _OS_OBJECT_HEADER(isa, ref_cnt, xref_cnt) \
        isa; /* must be pointer-sized */ \
        int volatile ref_cnt; \
        int volatile xref_cnt

我们用对应的结构替换掉定义的宏，如下：

struct dispatch_queue_s {
    //第一部分：DISPATCH_STRUCT_HEADER(dispatch_queue_s, dispatch_queue_vtable_s)
    const struct dispatch_queue_vtable_s *do_vtable;  //dispatch_queue_s的操作函数:dispatch_queue_vtable_s类型的结构体
    struct dispatch_queue_s *volatile do_next;  //链表的next
    unsigned int do_ref_cnt;    //引用计数
    unsigned int do_xref_cnt;  //外部引用计数
    unsigned int do_suspend_cnt;  //暂停标志，比如延时处理中，在任务到时后，计时器处理将会将该标志位修改，然后唤醒队列调度
    struct dispatch_queue_s *do_targetq;  //目标队列，GCD允许我们将一个队列放在另一个队列里执行任务
    void *do_ctxt;  //上下文，用来存储线程池相关数据，比如用于线程挂起和唤醒的信号量、线程池尺寸等
    void *do_finalizer;
    
    //第二部分：DISPATCH_QUEUE_HEADER
    uint32_t volatile dq_running;  //是否运行中
    uint32_t dq_width;  //最大并发数：主线程/串行中这个值为1
    struct dispatch_object_s *volatile dq_items_tail;  //链表尾节点
    struct dispatch_object_s *volatile dq_items_head;  //链表头节点
    unsigned long dq_serialnum;  //队列的序列号
    dispatch_queue_t dq_specific_q; //specific队列
    
    //其他：
    char dq_label[DISPATCH_QUEUE_MIN_LABEL_SIZE]; // must be last 说明队列的名字要少于64个字符
    char _dq_pad[DISPATCH_QUEUE_CACHELINE_PAD]; // for static queues only
};

dispatch_continuation_t

这个结构体就是用来封装block对象的，保存block的上下文环境和block执行函数等。

typedef struct dispatch_continuation_s {
	DISPATCH_CONTINUATION_HEADER(continuation);
} *dispatch_continuation_t;

查看DISPATCH_CONTINUATION_HEADER这个宏

#define DISPATCH_CONTINUATION_HEADER(x) \
    union { \
        const void *do_vtable; \
        uintptr_t dc_flags; \
    }; \
    union { \
        pthread_priority_t dc_priority; \
        int dc_cache_cnt; \
        uintptr_t dc_pad; \
    }; \
    struct dispatch_##x##_s *volatile do_next; \
    struct voucher_s *dc_voucher; \
    dispatch_function_t dc_func; \
    void *dc_ctxt; \
    void *dc_data; \
    void *dc_other

dispatch_object_t

typedef union {
	struct _os_object_s *_os_obj;
	struct dispatch_object_s *_do;
	struct dispatch_queue_s *_dq;
	struct dispatch_queue_attr_s *_dqa;
	struct dispatch_group_s *_dg;
	struct dispatch_source_s *_ds;
	struct dispatch_mach_s *_dm;
	struct dispatch_mach_msg_s *_dmsg;
	struct dispatch_semaphore_s *_dsema;
	struct dispatch_data_s *_ddata;
	struct dispatch_io_s *_dchannel;
} dispatch_object_t DISPATCH_TRANSPARENT_UNION;

dispatch_function_t

// dispatch_function_t只是一个函数指针
typedef void (*dispatch_function_t)(void *_Nullable);

GCD源码分析

创建队列

首先我们先从创建队列讲起

// 首先来看下创建队列传入的宏 DISPATCH_QUEUE_SERIAL & DISPATCH_QUEUE_CONCURRENT
#define DISPATCH_QUEUE_SERIAL NULL
#define DISPATCH_QUEUE_CONCURRENT \
		DISPATCH_GLOBAL_OBJECT(dispatch_queue_attr_t, \
		_dispatch_queue_attr_concurrent)

// 串行队列
dispatch_queue_t queueSerial = dispatch_queue_create("com.noah.serial", DISPATCH_QUEUE_SERIAL);
// dispatch_queue_t queueSerial = dispatch_queue_create("com.noah.serial", NULL); // 与上面的代码等价
// 并发队列
dispatch_queue_t queueConcurrent = dispatch_queue_create("com.noah.concurrent", DISPATCH_QUEUE_CONCURRENT);

创建队列的方法是调用dispatch_queue_create函数

#define DISPATCH_TARGET_QUEUE_DEFAULT NULL
// dispatch_queue_create函数
dispatch_queue_t
dispatch_queue_create(const char *label, dispatch_queue_attr_t attr)
{
	return _dispatch_lane_create_with_target(label, attr,
			DISPATCH_TARGET_QUEUE_DEFAULT, true);
}

跳转到_dispatch_lane_create_with_target函数

DISPATCH_NOINLINE
static dispatch_queue_t
_dispatch_lane_create_with_target(const char *label, dispatch_queue_attr_t dqa,
		dispatch_queue_t tq, bool legacy)
{
	
	// 通过传入的属性去获取属性信息
	dispatch_queue_attr_info_t dqai = _dispatch_queue_attr_to_info(dqa);

	//
	// Step 1: Normalize arguments (qos, overcommit, tq)
	//
	// 取出优先级
	dispatch_qos_t qos = dqai.dqai_qos;
#if !HAVE_PTHREAD_WORKQUEUE_QOS
	// DISPATCH_QOS_USER_INTERACTIVE   ((dispatch_qos_t)6)
	if (qos == DISPATCH_QOS_USER_INTERACTIVE) {
		dqai.dqai_qos = qos = DISPATCH_QOS_USER_INITIATED;
	}
	// DISPATCH_QOS_MAINTENANCE        ((dispatch_qos_t)1)
	if (qos == DISPATCH_QOS_MAINTENANCE) {
		dqai.dqai_qos = qos = DISPATCH_QOS_BACKGROUND;
	}
#endif // !HAVE_PTHREAD_WORKQUEUE_QOS
	// 一个标识符，表示是不是就算负荷很高了，但还是得给我新开一个线程出来给我执行任务。
	_dispatch_queue_attr_overcommit_t overcommit = dqai.dqai_overcommit;
	if (overcommit != _dispatch_queue_attr_overcommit_unspecified && tq) {
		if (tq->do_targetq) {
			DISPATCH_CLIENT_CRASH(tq, "Cannot specify both overcommit and "
					"a non-global target queue");
		}
	}
	// dispatch_queue_create函数中科院得知，tq传入的值为NULL
	if (tq && dx_type(tq) == DISPATCH_QUEUE_GLOBAL_ROOT_TYPE) {
		// Handle discrepancies between attr and target queue, attributes win
		if (overcommit == _dispatch_queue_attr_overcommit_unspecified) {
			if (tq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT) {
				overcommit = _dispatch_queue_attr_overcommit_enabled;
			} else {
				overcommit = _dispatch_queue_attr_overcommit_disabled;
			}
		}
		if (qos == DISPATCH_QOS_UNSPECIFIED) {
			qos = _dispatch_priority_qos(tq->dq_priority);
		}
		tq = NULL;
	} else if (tq && !tq->do_targetq) {
		// target is a pthread or runloop root queue, setting QoS or overcommit
		// is disallowed
		if (overcommit != _dispatch_queue_attr_overcommit_unspecified) {
			DISPATCH_CLIENT_CRASH(tq, "Cannot specify an overcommit attribute "
					"and use this kind of target queue");
		}
	} else {
		// 如果overcommit没有被指定
		if (overcommit == _dispatch_queue_attr_overcommit_unspecified) {
			// Serial queues default to overcommit!
			// 所以对于overcommit，如果是串行的话默认是开启的，而并行是关闭的
			overcommit = dqai.dqai_concurrent ?
					_dispatch_queue_attr_overcommit_disabled :
					_dispatch_queue_attr_overcommit_enabled;
		}
	}
	// 之前说过初始化队列默认传了DISPATCH_TARGET_QUEUE_DEFAULT，也就是null，所以进入if语句。
	if (!tq) {
		// 获取一个管理自己队列的root队列。
		tq = _dispatch_get_root_queue(
				qos == DISPATCH_QOS_UNSPECIFIED ? DISPATCH_QOS_DEFAULT : qos, // 4
				overcommit == _dispatch_queue_attr_overcommit_enabled)->_as_dq; // 0 1
		if (unlikely(!tq)) {
			DISPATCH_CLIENT_CRASH(qos, "Invalid queue attribute");
		}
	}

	//
	// Step 2: Initialize the queue
	//
	// legacy默认是true
	if (legacy) {
		// if any of these attributes is specified, use non legacy classes
		// 默认是会给dqa_autorelease_frequency指定为DISPATCH_AUTORELEASE_FREQUENCY_INHERIT，所以这个判断式是成立的
		if (dqai.dqai_inactive || dqai.dqai_autorelease_frequency) {
			legacy = false;
		}
	}
	
	// vtable变量很重要，之后会被赋值到之前说的dispatch_queue_t结构体里的do_vtable变量上
	const void *vtable;
	dispatch_queue_flags_t dqf = legacy ? DQF_MUTABLE : 0;
	if (dqai.dqai_concurrent) {
		// 通过dqai.dqai_concurrent 来区分并发和串行
		// OS_dispatch_queue_concurrent_class
		vtable = DISPATCH_VTABLE(queue_concurrent);
	} else {
		vtable = DISPATCH_VTABLE(queue_serial);
	}
	switch (dqai.dqai_autorelease_frequency) {
	case DISPATCH_AUTORELEASE_FREQUENCY_NEVER:
		dqf |= DQF_AUTORELEASE_NEVER;
		break;
	case DISPATCH_AUTORELEASE_FREQUENCY_WORK_ITEM:
		dqf |= DQF_AUTORELEASE_ALWAYS;
		break;
	}
	// 名字赋值
	if (label) {
		const char *tmp = _dispatch_strdup_if_mutable(label);
		if (tmp != label) {
			dqf |= DQF_LABEL_NEEDS_FREE;
			label = tmp;
		}
	}

	// 开辟内存 - 生成响应的对象 queue
	dispatch_lane_t dq = _dispatch_object_alloc(vtable,
			sizeof(struct dispatch_lane_s));
	
	// 构造方法 根据dqai.dqai_concurrent来确定能开启多少条线程（）
	// DISPATCH_QUEUE_WIDTH_MAX  (DISPATCH_QUEUE_WIDTH_FULL - 2) = 0xffe
	_dispatch_queue_init(dq, dqf, dqai.dqai_concurrent ?
			DISPATCH_QUEUE_WIDTH_MAX : 1, DISPATCH_QUEUE_ROLE_INNER |
			(dqai.dqai_inactive ? DISPATCH_QUEUE_INACTIVE : 0));

	// 标签
	dq->dq_label = label;
	// 优先级
	dq->dq_priority = _dispatch_priority_make((dispatch_qos_t)dqai.dqai_qos,
			dqai.dqai_relpri);
	if (overcommit == _dispatch_queue_attr_overcommit_enabled) {
		dq->dq_priority |= DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
	}
	if (!dqai.dqai_inactive) {
		_dispatch_queue_priority_inherit_from_target(dq, tq);
		_dispatch_lane_inherit_wlh_from_target(dq, tq);
	}
	_dispatch_retain(tq);
	// 自定义的queue的目标队列是root队列
	dq->do_targetq = tq;
	_dispatch_object_debug(dq, "%s", __func__);
	return _dispatch_trace_queue_create(dq)._dq;
}

拆解函数_dispatch_queue_attr_to_info，把dqa传进去判断当前要创建线程的信息

dispatch_queue_attr_info_t
_dispatch_queue_attr_to_info(dispatch_queue_attr_t dqa)
{
	dispatch_queue_attr_info_t dqai = { };
	// 如果属性为空，直接返回空的属性信息（串行队列）
	if (!dqa) return dqai;
	// 传入的属性不为空，则向属性信息里面赋值
#if DISPATCH_VARIANT_STATIC
	if (dqa == &_dispatch_queue_attr_concurrent) {
		dqai.dqai_concurrent = true;
		return dqai;
	}
#endif

	if (dqa < _dispatch_queue_attrs ||
			dqa >= &_dispatch_queue_attrs[DISPATCH_QUEUE_ATTR_COUNT]) {
		DISPATCH_CLIENT_CRASH(dqa->do_vtable, "Invalid queue attribute");
	}

	// 苹果的算法
	size_t idx = (size_t)(dqa - _dispatch_queue_attrs);

	// 位域
	// 0000 000000000 00000000000 0000 000  1
	// 对属性进行赋值，针对并发队列
	dqai.dqai_inactive = (idx % DISPATCH_QUEUE_ATTR_INACTIVE_COUNT);
	idx /= DISPATCH_QUEUE_ATTR_INACTIVE_COUNT;

	dqai.dqai_concurrent = !(idx % DISPATCH_QUEUE_ATTR_CONCURRENCY_COUNT);
	idx /= DISPATCH_QUEUE_ATTR_CONCURRENCY_COUNT;

	dqai.dqai_relpri = -(idx % DISPATCH_QUEUE_ATTR_PRIO_COUNT);
	idx /= DISPATCH_QUEUE_ATTR_PRIO_COUNT;

	dqai.dqai_qos = idx % DISPATCH_QUEUE_ATTR_QOS_COUNT;
	idx /= DISPATCH_QUEUE_ATTR_QOS_COUNT;

	dqai.dqai_autorelease_frequency =
			idx % DISPATCH_QUEUE_ATTR_AUTORELEASE_FREQUENCY_COUNT;
	idx /= DISPATCH_QUEUE_ATTR_AUTORELEASE_FREQUENCY_COUNT;

	dqai.dqai_overcommit = idx % DISPATCH_QUEUE_ATTR_OVERCOMMIT_COUNT;
	idx /= DISPATCH_QUEUE_ATTR_OVERCOMMIT_COUNT;

	return dqai;
}

创建一个root队列，_dispatch_get_root_queue函数。取root队列，一般是从一个装有12个root队列数组里面取。

DISPATCH_ALWAYS_INLINE DISPATCH_CONST
static inline dispatch_queue_global_t
_dispatch_get_root_queue(dispatch_qos_t qos, bool overcommit)
{
	if (unlikely(qos < DISPATCH_QOS_MIN || qos > DISPATCH_QOS_MAX)) {
		DISPATCH_CLIENT_CRASH(qos, "Corrupted priority");
	}
	// 4-1= 3
	// 2*3+0/1 = 6/7
	return &_dispatch_root_queues[2 * (qos - 1) + overcommit];
}

看下这个_dispatch_root_queues数组。我们可以看到，每一个优先级都有对应的root队列，每一个优先级又分为是不是可以过载的队列。

struct dispatch_queue_global_s _dispatch_root_queues[] = {
#define _DISPATCH_ROOT_QUEUE_IDX(n, flags) \
		((flags & DISPATCH_PRIORITY_FLAG_OVERCOMMIT) ? \
		DISPATCH_ROOT_QUEUE_IDX_##n##_QOS_OVERCOMMIT : \
		DISPATCH_ROOT_QUEUE_IDX_##n##_QOS)
#define _DISPATCH_ROOT_QUEUE_ENTRY(n, flags, ...) \
	[_DISPATCH_ROOT_QUEUE_IDX(n, flags)] = { \
		DISPATCH_GLOBAL_OBJECT_HEADER(queue_global), \
		.dq_state = DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE, \
		.do_ctxt = _dispatch_root_queue_ctxt(_DISPATCH_ROOT_QUEUE_IDX(n, flags)), \
		.dq_atomic_flags = DQF_WIDTH(DISPATCH_QUEUE_WIDTH_POOL), \
		.dq_priority = flags | ((flags & DISPATCH_PRIORITY_FLAG_FALLBACK) ? \
				_dispatch_priority_make_fallback(DISPATCH_QOS_##n) : \
				_dispatch_priority_make(DISPATCH_QOS_##n, 0)), \
		__VA_ARGS__ \
	}
	_DISPATCH_ROOT_QUEUE_ENTRY(MAINTENANCE, 0,
		.dq_label = "com.apple.root.maintenance-qos",// 0
		.dq_serialnum = 4,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(MAINTENANCE, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
		.dq_label = "com.apple.root.maintenance-qos.overcommit",
		.dq_serialnum = 5,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(BACKGROUND, 0,
		.dq_label = "com.apple.root.background-qos",// 1
		.dq_serialnum = 6,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(BACKGROUND, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
		.dq_label = "com.apple.root.background-qos.overcommit",// 2
		.dq_serialnum = 7,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(UTILITY, 0,
		.dq_label = "com.apple.root.utility-qos",// 3
		.dq_serialnum = 8,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(UTILITY, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
		.dq_label = "com.apple.root.utility-qos.overcommit",// 4
		.dq_serialnum = 9,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(DEFAULT, DISPATCH_PRIORITY_FLAG_FALLBACK,
		.dq_label = "com.apple.root.default-qos",// 5
		.dq_serialnum = 10,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(DEFAULT,
			DISPATCH_PRIORITY_FLAG_FALLBACK | DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
		.dq_label = "com.apple.root.default-qos.overcommit",// 6
		.dq_serialnum = 11,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INITIATED, 0,
		.dq_label = "com.apple.root.user-initiated-qos",// 7
		.dq_serialnum = 12,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INITIATED, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
		.dq_label = "com.apple.root.user-initiated-qos.overcommit",
		.dq_serialnum = 13,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INTERACTIVE, 0,
		.dq_label = "com.apple.root.user-interactive-qos",
		.dq_serialnum = 14,
	),
	_DISPATCH_ROOT_QUEUE_ENTRY(USER_INTERACTIVE, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
		.dq_label = "com.apple.root.user-interactive-qos.overcommit",
		.dq_serialnum = 15,
	),
};

然后看下DISPATCH_VTABLE这个宏定义，解开来就是&OS_dispatch_##name##_class，其实就是取dispatch_object_t对象

_dispatch_queue_init函数，这里做的是一些初始化工作，这里的width会根据串行还是并发去改变，自定义的串行队列是0x01，并发的是0xffe

static inline dispatch_queue_class_t
_dispatch_queue_init(dispatch_queue_class_t dqu, dispatch_queue_flags_t dqf,
		uint16_t width, uint64_t initial_state_bits)
{
	uint64_t dq_state = DISPATCH_QUEUE_STATE_INIT_VALUE(width);
	dispatch_queue_t dq = dqu._dq;

	dispatch_assert((initial_state_bits & ~(DISPATCH_QUEUE_ROLE_MASK |
			DISPATCH_QUEUE_INACTIVE)) == 0);

	if (initial_state_bits & DISPATCH_QUEUE_INACTIVE) {
		dq_state |= DISPATCH_QUEUE_INACTIVE + DISPATCH_QUEUE_NEEDS_ACTIVATION;
		dq->do_ref_cnt += 2; // rdar://8181908 see _dispatch_lane_resume
		if (dx_metatype(dq) == _DISPATCH_SOURCE_TYPE) {
			dq->do_ref_cnt++; // released when DSF_DELETED is set
		}
	}

	dq_state |= (initial_state_bits & DISPATCH_QUEUE_ROLE_MASK);
	// 指向DISPATCH_OBJECT_LISTLESS是优化编译器的作用。只是为了生成更好的指令让CPU更好的编码
	dq->do_next = DISPATCH_OBJECT_LISTLESS;
	dqf |= DQF_WIDTH(width);
	// dqf 保存进 dq->dq_atomic_flags
	os_atomic_store2o(dq, dq_atomic_flags, dqf, relaxed);
	dq->dq_state = dq_state;
	dq->dq_serialnum =
			os_atomic_inc_orig(&_dispatch_queue_serial_numbers, relaxed);
	return dqu;
}

最后是_dispatch_trace_queue_create函数，这里返回创建的队列对象

DISPATCH_ALWAYS_INLINE
static inline dispatch_queue_class_t
_dispatch_trace_queue_create(dispatch_queue_class_t dqu)
{
	_dispatch_only_if_ktrace_enabled({
		uint64_t dq_label[4] = {0}; // So that we get the right null termination
		dispatch_queue_t dq = dqu._dq;
		strncpy((char *)dq_label, (char *)dq->dq_label ?: "", sizeof(dq_label));

		_dispatch_ktrace2(DISPATCH_QOS_TRACE_queue_creation_start,
				dq->dq_serialnum,
				_dispatch_priority_to_pp_prefer_fallback(dq->dq_priority));

		_dispatch_ktrace4(DISPATCH_QOS_TRACE_queue_creation_end,
						dq_label[0], dq_label[1], dq_label[2], dq_label[3]);
	});

	return _dispatch_introspection_queue_create(dqu);
}

创建队列可以分以下几点

根据传入的dqa值去创建一个dispatch_queue_attr_info_t的对象dqai，这里面保存了队列的基本信息
根据dqai.dqai_concurrent去实例化vtable对象，这个对象给不同队列指定了push、wakeup等函数。这里的vtable是一个指针，指向获取到的dispatch_object_t对象
为新建的队列开辟内存，开辟内存的时候就把vtable传进去与队列绑定，初始化队列对象，根据dqai.dqai_concurrent去设置width的大小
设置队列对象的label
根据dqai.dqai_qos去设置队列优先级
获取一个管理自己队列的root队列，把它赋值给新队列的目标队列
返回新的队列

同步&异步运行

dispatch_sync

DISPATCH_NOINLINE
void
dispatch_sync(dispatch_queue_t dq, dispatch_block_t work)
{
	uintptr_t dc_flags = DC_FLAG_BLOCK;
	if (unlikely(_dispatch_block_has_private_data(work))) {
		return _dispatch_sync_block_with_privdata(dq, work, dc_flags);
	}
	_dispatch_sync_f(dq, work, _dispatch_Block_invoke(work), dc_flags);
}

DISPATCH_NOINLINE
static void
_dispatch_sync_f(dispatch_queue_t dq, void *ctxt, dispatch_function_t func,
		uintptr_t dc_flags)
{
	_dispatch_sync_f_inline(dq, ctxt, func, dc_flags);
}

重点函数在_dispatch_sync_f_inline，这里是串行队列、并发队列的入口

DISPATCH_ALWAYS_INLINE
static inline void
_dispatch_sync_f_inline(dispatch_queue_t dq, void *ctxt,
		dispatch_function_t func, uintptr_t dc_flags)
{
	
	// 串行 来到这里
	if (likely(dq->dq_width == 1)) {
		// 底层串行队列和栅栏函数实现方法一致
		return _dispatch_barrier_sync_f(dq, ctxt, func, dc_flags);
	}

	if (unlikely(dx_metatype(dq) != _DISPATCH_LANE_TYPE)) {
		DISPATCH_CLIENT_CRASH(0, "Queue type doesn't support dispatch_sync");
	}

	dispatch_lane_t dl = upcast(dq)._dl;
	// Global concurrent queues and queues bound to non-dispatch threads
	// always fall into the slow case, see DISPATCH_ROOT_QUEUE_STATE_INIT_VALUE
	// 全局获取的并发队列或者绑定的是非调度线程的队列会走进这个if分支
	if (unlikely(!_dispatch_queue_try_reserve_sync_width(dl))) {
		return _dispatch_sync_f_slow(dl, ctxt, func, 0, dl, dc_flags);
	}

	if (unlikely(dq->do_targetq->do_targetq)) {
		return _dispatch_sync_recurse(dl, ctxt, func, dc_flags);
	}
	_dispatch_introspection_sync_begin(dl);
	// 并发来到这里
	_dispatch_sync_invoke_and_complete(dl, ctxt, func DISPATCH_TRACE_ARG(
			_dispatch_trace_item_sync_push_pop(dq, ctxt, func, dc_flags)));
}

先来看一下串行队列会执行的_dispatch_barrier_sync_f

DISPATCH_NOINLINE
static void
_dispatch_barrier_sync_f(dispatch_queue_t dq, void *ctxt,
		dispatch_function_t func, uintptr_t dc_flags)
{
	_dispatch_barrier_sync_f_inline(dq, ctxt, func, dc_flags);
}

进入_dispatch_barrier_sync_f_inline函数里面

DISPATCH_ALWAYS_INLINE
static inline void
_dispatch_barrier_sync_f_inline(dispatch_queue_t dq, void *ctxt,
		dispatch_function_t func, uintptr_t dc_flags)
{
	// 获取线程ID -- mach pthread --
	dispatch_tid tid = _dispatch_tid_self();

	if (unlikely(dx_metatype(dq) != _DISPATCH_LANE_TYPE)) {
		DISPATCH_CLIENT_CRASH(0, "Queue type doesn't support dispatch_sync");
	}

	dispatch_lane_t dl = upcast(dq)._dl;
	// 当前线程尝试绑定获取串行队列的lock
	if (unlikely(!_dispatch_queue_try_acquire_barrier_sync(dl, tid))) {
		// 线程获取不到queue的lock，则串行入队等待，当前线程阻塞
		return _dispatch_sync_f_slow(dl, ctxt, func, DC_FLAG_BARRIER, dl,
				DC_FLAG_BARRIER | dc_flags);
	}

	if (unlikely(dl->do_targetq->do_targetq)) {
		return _dispatch_sync_recurse(dl, ctxt, func,
				DC_FLAG_BARRIER | dc_flags);
	}
	_dispatch_introspection_sync_begin(dl);
	// 不需要等待，则走这里
	_dispatch_lane_barrier_sync_invoke_and_complete(dl, ctxt, func
			DISPATCH_TRACE_ARG(_dispatch_trace_item_sync_push_pop(
					dq, ctxt, func, dc_flags | DC_FLAG_BARRIER)));
}

重点看一下线程是如何尝试获取串行队列lock的，这很重要，这一步是后面的死锁检测的基础

// _dispatch_queue_try_acquire_barrier_sync
DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
static inline bool
_dispatch_queue_try_acquire_barrier_sync(dispatch_queue_class_t dq, uint32_t tid)
{
	return _dispatch_queue_try_acquire_barrier_sync_and_suspend(dq._dl, tid, 0);
}

//_dispatch_queue_try_acquire_barrier_sync_and_suspend
DISPATCH_ALWAYS_INLINE DISPATCH_WARN_RESULT
static inline bool
_dispatch_queue_try_acquire_barrier_sync_and_suspend(dispatch_lane_t dq,
		uint32_t tid, uint64_t suspend_count)
{
	uint64_t init  = DISPATCH_QUEUE_STATE_INIT_VALUE(dq->dq_width);
	// _dispatch_lock_value_from_tid 会去取tid二进制数的2到31位 作为值（从0位算起）
	uint64_t value = DISPATCH_QUEUE_WIDTH_FULL_BIT | DISPATCH_QUEUE_IN_BARRIER |
			_dispatch_lock_value_from_tid(tid) |
			(suspend_count * DISPATCH_QUEUE_SUSPEND_INTERVAL);
	uint64_t old_state, new_state;
	// 从底层获取当前队列的状态信息
	return os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, acquire, {
		uint64_t role = old_state & DISPATCH_QUEUE_ROLE_MASK;
		if (old_state != (init | role)) {
			os_atomic_rmw_loop_give_up(break);
		}
		new_state = value | role;
	});
}

上面代码会去取dispatch_queue_t中的dq_state值。当这个dq_state没有被别人修改过，即第一次被修改时，会将dq_state设置为new_state, 并返回true。此时，在new_state中标记了当前的queue被lock，同时记录了lock 当前queue的线程tid。

回到上一级代码中，进入到_dispatch_sync_f_slow函数

DISPATCH_ALWAYS_INLINE
static inline void
_dispatch_barrier_sync_f_inline(dispatch_queue_t dq, void *ctxt,
		dispatch_function_t func, uintptr_t dc_flags)
{
	// ...
	
	// 当前线程尝试绑定获取串行队列的lock
	if (unlikely(!_dispatch_queue_try_acquire_barrier_sync(dl, tid))) {
		// 线程获取不到queue的lock，则串行入队等待，当前线程阻塞
		return _dispatch_sync_f_slow(dl, ctxt, func, DC_FLAG_BARRIER, dl,
				DC_FLAG_BARRIER | dc_flags);
	}

	// ...
}

_dispatch_sync_f_slow(dispatch_queue_class_t top_dqu, void *ctxt,
		dispatch_function_t func, uintptr_t top_dc_flags,
		dispatch_queue_class_t dqu, uintptr_t dc_flags)
{
	dispatch_queue_t top_dq = top_dqu._dq;
	dispatch_queue_t dq = dqu._dq;
	// 如果目标队列不存在，则走入这个if里面，一般不会走
	if (unlikely(!dq->do_targetq)) {
		return _dispatch_sync_function_invoke(dq, ctxt, func);
	}
	// 获取优先级
	pthread_priority_t pp = _dispatch_get_priority();
	struct dispatch_sync_context_s dsc = {
		.dc_flags    = DC_FLAG_SYNC_WAITER | dc_flags,
		.dc_func     = _dispatch_async_and_wait_invoke,
		.dc_ctxt     = &dsc,
		.dc_other    = top_dq,
		.dc_priority = pp | _PTHREAD_PRIORITY_ENFORCE_FLAG,
		.dc_voucher  = _voucher_get(),
		.dsc_func    = func,
		.dsc_ctxt    = ctxt,
		.dsc_waiter  = _dispatch_tid_self(),
	};
	
	_dispatch_trace_item_push(top_dq, &dsc);
	// 死锁检测
	__DISPATCH_WAIT_FOR_QUEUE__(&dsc, dq);

	if (dsc.dsc_func == NULL) {
		dispatch_queue_t stop_dq = dsc.dc_other;
		return _dispatch_sync_complete_recurse(top_dq, stop_dq, top_dc_flags);
	}
	// 把队列置为一个开始的状态
	_dispatch_introspection_sync_begin(top_dq);
	// 把任务pop出来
	_dispatch_trace_item_pop(top_dq, &dsc);
	// 执行完成的回调
	_dispatch_sync_invoke_and_complete_recurse(top_dq, ctxt, func,top_dc_flags
			DISPATCH_TRACE_ARG(&dsc));
}

__DISPATCH_WAIT_FOR_QUEUE__函数

DISPATCH_NOINLINE
static void
__DISPATCH_WAIT_FOR_QUEUE__(dispatch_sync_context_t dsc, dispatch_queue_t dq)
{
	//等待dq
	uint64_t dq_state = _dispatch_wait_prepare(dq);
	// 检测是否会发生死锁，若会发生死锁，则直接crash
	if (unlikely(_dq_state_drain_locked_by(dq_state, dsc->dsc_waiter))) {
		DISPATCH_CLIENT_CRASH((uintptr_t)dq_state,
				"dispatch_sync called on queue "
				"already owned by current thread");
	}

	...
}

判断死锁的主要代码

DISPATCH_ALWAYS_INLINE
static inline bool
_dispatch_lock_is_locked_by(dispatch_lock lock_value, dispatch_tid tid)
{
	// lock_value就是dq_state,一个32位的整数。通过判断((lock_value ^ tid) & DLOCK_OWNER_MASK)是否为0，来判断当前的串行队列是否已被同一个线程所获取。如果当前队列已经被当前线程获取，即当前线程在执行一个串行任务中，如果此时我们在阻塞等待一个新的串行任务，则会发生死锁。
	// 当((lock_value ^ tid) & DLOCK_OWNER_MASK) == 0 时，就会主动触发crash来避免死锁
	return ((lock_value ^ tid) & DLOCK_OWNER_MASK) == 0;
}

再来看并行队列会走的分支：

DISPATCH_NOINLINE
static void
_dispatch_lane_barrier_sync_invoke_and_complete(dispatch_lane_t dq,
		void *ctxt, dispatch_function_t func DISPATCH_TRACE_ARG(void *dc))
{
	_dispatch_sync_function_invoke_inline(dq, ctxt, func);
	_dispatch_trace_item_complete(dc);
	if (unlikely(dq->dq_items_tail || dq->dq_width > 1)) {
		return _dispatch_lane_barrier_complete(dq, 0, 0);
	}

	// Presence of any of these bits requires more work that only
	// _dispatch_*_barrier_complete() handles properly
	//
	// Note: testing for RECEIVED_OVERRIDE or RECEIVED_SYNC_WAIT without
	// checking the role is sloppy, but is a super fast check, and neither of
	// these bits should be set if the lock was never contended/discovered.
	const uint64_t fail_unlock_mask = DISPATCH_QUEUE_SUSPEND_BITS_MASK |
			DISPATCH_QUEUE_ENQUEUED | DISPATCH_QUEUE_DIRTY |
			DISPATCH_QUEUE_RECEIVED_OVERRIDE | DISPATCH_QUEUE_SYNC_TRANSFER |
			DISPATCH_QUEUE_RECEIVED_SYNC_WAIT;
	uint64_t old_state, new_state;

	// similar to _dispatch_queue_drain_try_unlock
	os_atomic_rmw_loop2o(dq, dq_state, old_state, new_state, release, {
		new_state  = old_state - DISPATCH_QUEUE_SERIAL_DRAIN_OWNED;
		new_state &= ~DISPATCH_QUEUE_DRAIN_UNLOCK_MASK;
		new_state &= ~DISPATCH_QUEUE_MAX_QOS_MASK;
		if (unlikely(old_state & fail_unlock_mask)) {
			os_atomic_rmw_loop_give_up({
				return _dispatch_lane_barrier_complete(dq, 0, 0);
			});
		}
	});
	if (_dq_state_is_base_wlh(old_state)) {
		_dispatch_event_loop_assert_not_owned((dispatch_wlh_t)dq);
	}
}

DISPATCH_ALWAYS_INLINE
static inline void
_dispatch_sync_function_invoke_inline(dispatch_queue_class_t dq, void *ctxt,
		dispatch_function_t func)
{
	dispatch_thread_frame_s dtf;
	_dispatch_thread_frame_push(&dtf, dq); // 保护现场
	// f(ctxt) -- func(ctxt)
	_dispatch_client_callout(ctxt, func); // 执行回调
	_dispatch_perfmon_workitem_inc();
	_dispatch_thread_frame_pop(&dtf);
}

可见，并行队列不会创建线程取执行dispatch_sync命令

dispatch_async

无论dq是什么类型的queue，GCD首先会将work打包成dispatch_continuation_t 类型，然后调用方法_dispatch_continuation_async。

void
dispatch_async(dispatch_queue_t dq, dispatch_block_t work)
{
	// 创建dc dispatch_continuation_t：block 被封装成的 dispatch_continuation_t 结构
	dispatch_continuation_t dc = _dispatch_continuation_alloc();
	// 设置flag
	uintptr_t dc_flags = DC_FLAG_CONSUME;
	dispatch_qos_t qos;
	// 将work打包成dispatch_continuation_t
	qos = _dispatch_continuation_init(dc, dq, work, 0, dc_flags);
	_dispatch_continuation_async(dq, dc, qos, dc->dc_flags);
}

我们先看work是怎么打包成dispatch_continuation_t的

DISPATCH_ALWAYS_INLINE
static inline dispatch_qos_t
_dispatch_continuation_init(dispatch_continuation_t dc,
		dispatch_queue_class_t dqu, dispatch_block_t work,
		dispatch_block_flags_t flags, uintptr_t dc_flags)
{
	// 复制一个block
	void *ctxt = _dispatch_Block_copy(work);
	
	dc_flags |= DC_FLAG_BLOCK | DC_FLAG_ALLOCATED;
	if (unlikely(_dispatch_block_has_private_data(work))) {
		dc->dc_flags = dc_flags;
		dc->dc_ctxt = ctxt;
		// will initialize all fields but requires dc_flags & dc_ctxt to be set
		return _dispatch_continuation_init_slow(dc, dqu, flags);
	}

	dispatch_function_t func = _dispatch_Block_invoke(work);
	// 之前的flag 被设置为DISPATCH_OBJ_CONSUME_BIT，因此会走这里
	if (dc_flags & DC_FLAG_CONSUME) {
		// 这里是设置dc的功能函数：1. 执行block 2. release block对象
		func = _dispatch_call_block_and_release;
	}
	return _dispatch_continuation_init_f(dc, dqu, ctxt, func, flags, dc_flags);
}

这里主要关注的是_dispatch_call_block_and_release这个函数，这个函数主要是把dc的功能函数赋值

void
_dispatch_call_block_and_release(void *block)
{
	void (^b)(void) = block;
	b();
	Block_release(b);
}

// _dispatch_continuation_init_f函数
DISPATCH_ALWAYS_INLINE
static inline dispatch_qos_t
_dispatch_continuation_init_f(dispatch_continuation_t dc,
		dispatch_queue_class_t dqu, void *ctxt, dispatch_function_t f,
		dispatch_block_flags_t flags, uintptr_t dc_flags)
{
	// 传入的block会保存到dc->dc_func中
	pthread_priority_t pp = 0;
	dc->dc_flags = dc_flags | DC_FLAG_ALLOCATED;
	dc->dc_func = f;
	dc->dc_ctxt = ctxt;
	// in this context DISPATCH_BLOCK_HAS_PRIORITY means that the priority
	// should not be propagated, only taken from the handler if it has one
	if (!(flags & DISPATCH_BLOCK_HAS_PRIORITY)) {
		pp = _dispatch_priority_propagate();
	}
	_dispatch_continuation_voucher_set(dc, flags);
	return _dispatch_continuation_priority_set(dc, dqu, pp, flags);
}

接下来回到上一步，进入_dispatch_continuation_async函数

DISPATCH_ALWAYS_INLINE
static inline void
_dispatch_continuation_async(dispatch_queue_class_t dqu,
		dispatch_continuation_t dc, dispatch_qos_t qos, uintptr_t dc_flags)
{
#if DISPATCH_INTROSPECTION
	if (!(dc_flags & DC_FLAG_NO_INTROSPECTION)) {
		_dispatch_trace_item_push(dqu, dc);
	}
#else
	(void)dc_flags;
#endif
	return dx_push(dqu._dq, dc, qos);
}

接下来我们就分析dx_push这个宏

#define dx_push(x, y, z) dx_vtable(x)->dq_push(x, y, z)

可以看到这个宏是调用dx_vtable里面的属性函数dq_push，而这个dx_vtable就是我们前面创建队列时的那个vtable 我们寻找dq_push的赋值函数，为了方便接下来的探索，就直接用全局并发队列的dq_push赋值函数去探索

DISPATCH_NOINLINE
void
_dispatch_root_queue_push(dispatch_queue_global_t rq, dispatch_object_t dou,
		dispatch_qos_t qos)
{
    ...
	// 重点函数
	_dispatch_root_queue_push_inline(rq, dou, dou, 1);
}

DISPATCH_ALWAYS_INLINE
static inline void
_dispatch_root_queue_push_inline(dispatch_queue_global_t dq,
		dispatch_object_t _head, dispatch_object_t _tail, int n)
{
	struct dispatch_object_s *hd = _head._do, *tl = _tail._do;
	if (unlikely(os_mpsc_push_list(os_mpsc(dq, dq_items), hd, tl, do_next))) {
		// 重点函数
		return _dispatch_root_queue_poke(dq, n, 0);
	}
}

DISPATCH_NOINLINE
void
_dispatch_root_queue_poke(dispatch_queue_global_t dq, int n, int floor)
{
    ...
	// 重点函数
	return _dispatch_root_queue_poke_slow(dq, n, floor);
}

重点分析函数在_dispatch_root_queue_poke_slow

DISPATCH_NOINLINE
static void
_dispatch_root_queue_poke_slow(dispatch_queue_global_t dq, int n, int floor)
{
	int remaining = n;
	int r = ENOSYS;
	// 先初始化root queues 包括初始化XUN 的workqueue
	_dispatch_root_queues_init();
	_dispatch_debug_root_queue(dq, __func__);
	_dispatch_trace_runtime_event(worker_request, dq, (uint64_t)n);

#if !DISPATCH_USE_INTERNAL_WORKQUEUE
#if DISPATCH_USE_PTHREAD_ROOT_QUEUES
	// 如果dq的type是DISPATCH_QUEUE_GLOBAL_ROOT_TYPE类型
	if (dx_type(dq) == DISPATCH_QUEUE_GLOBAL_ROOT_TYPE)
#endif
	{
		_dispatch_root_queue_debug("requesting new worker thread for global "
				"queue: %p", dq);
		// 这里创建线程
		// 直接往工作队列添加线程
		r = _pthread_workqueue_addthreads(remaining,
				_dispatch_priority_to_pp_prefer_fallback(dq->dq_priority));
		(void)dispatch_assume_zero(r);
		return;
	}
#endif // !DISPATCH_USE_INTERNAL_WORKQUEUE
#if DISPATCH_USE_PTHREAD_POOL
	dispatch_pthread_root_queue_context_t pqc = dq->do_ctxt;
	if (likely(pqc->dpq_thread_mediator.do_vtable)) {
		while (dispatch_semaphore_signal(&pqc->dpq_thread_mediator)) {
			_dispatch_root_queue_debug("signaled sleeping worker for "
					"global queue: %p", dq);
			if (!--remaining) {
				return;
			}
		}
	}

	bool overcommit = dq->dq_priority & DISPATCH_PRIORITY_FLAG_OVERCOMMIT;
	if (overcommit) {
		os_atomic_add2o(dq, dgq_pending, remaining, relaxed);
	} else {
		if (!os_atomic_cmpxchg2o(dq, dgq_pending, 0, remaining, relaxed)) {
			_dispatch_root_queue_debug("worker thread request still pending for "
					"global queue: %p", dq);
			return;
		}
	}
	// 这个do while循环检测是否能开辟线程
	int can_request, t_count;
	// seq_cst with atomic store to tail <rdar://problem/16932833>
	// t_count表示现有的线程个数
	t_count = os_atomic_load2o(dq, dgq_thread_pool_size, ordered);
	do {
		// 如果现有的线程个数小于线程池的大小
		can_request = t_count < floor ? 0 : t_count - floor;
		// 如果当前要产生的大小大于可以产生的大小
		if (remaining > can_request) {
			// 容积崩溃
			_dispatch_root_queue_debug("pthread pool reducing request from %d to %d",
					remaining, can_request);
			os_atomic_sub2o(dq, dgq_pending, remaining - can_request, relaxed);
			remaining = can_request;
		}
		if (remaining == 0) {
			// 线程池满了，崩溃
			_dispatch_root_queue_debug("pthread pool is full for root queue: "
					"%p", dq);
			return;
		}
	} while (!os_atomic_cmpxchgvw2o(dq, dgq_thread_pool_size, t_count,
			t_count - remaining, &t_count, acquire));

	pthread_attr_t *attr = &pqc->dpq_thread_attr;
	pthread_t tid, *pthr = &tid;
#if DISPATCH_USE_MGR_THREAD && DISPATCH_USE_PTHREAD_ROOT_QUEUES
	if (unlikely(dq == &_dispatch_mgr_root_queue)) {
		pthr = _dispatch_mgr_root_queue_init();
	}
#endif
	// 开辟线程
	do {
		_dispatch_retain(dq); // released in _dispatch_worker_thread
		// 根据remaining大小去创建线程 这里是普通的并发队列创建线程
		while ((r = pthread_create(pthr, attr, _dispatch_worker_thread, dq))) {
			if (r != EAGAIN) {
				(void)dispatch_assume_zero(r);
			}
			_dispatch_temporary_resource_shortage();
		}
	} while (--remaining);
#else
	(void)floor;
#endif // DISPATCH_USE_PTHREAD_POOL
}

这里面就是线程的开辟方法，重点方法在_dispatch_root_queues_init函数

DISPATCH_ALWAYS_INLINE
static inline void
_dispatch_root_queues_init(void)
{
    // 只执行一次
	dispatch_once_f(&_dispatch_root_queues_pred, NULL,
			_dispatch_root_queues_init_once);
}

_dispatch_root_queues_init_once函数，主要函数在_dispatch_worker_thread2

static void
_dispatch_root_queues_init_once(void *context DISPATCH_UNUSED)
{
	_dispatch_fork_becomes_unsafe();
#if DISPATCH_USE_INTERNAL_WORKQUEUE
	size_t i;
	for (i = 0; i < DISPATCH_ROOT_QUEUE_COUNT; i++) {
		_dispatch_root_queue_init_pthread_pool(&_dispatch_root_queues[i], 0,
				_dispatch_root_queues[i].dq_priority);
	}
#else
	int wq_supported = _pthread_workqueue_supported();
	int r = ENOTSUP;

	if (!(wq_supported & WORKQ_FEATURE_MAINTENANCE)) {
		DISPATCH_INTERNAL_CRASH(wq_supported,
				"QoS Maintenance support required");
	}

	if (unlikely(!_dispatch_kevent_workqueue_enabled)) {
		r = _pthread_workqueue_init(_dispatch_worker_thread2,
				offsetof(struct dispatch_queue_s, dq_serialnum), 0);
#if DISPATCH_USE_KEVENT_WORKQUEUE
	} else if (wq_supported & WORKQ_FEATURE_KEVENT) {
		r = _pthread_workqueue_init_with_kevent(_dispatch_worker_thread2,
				(pthread_workqueue_function_kevent_t)
				_dispatch_kevent_worker_thread,
				offsetof(struct dispatch_queue_s, dq_serialnum), 0);
#endif
	} else {
		DISPATCH_INTERNAL_CRASH(wq_supported, "Missing Kevent WORKQ support");
	}

	if (r != 0) {
		DISPATCH_INTERNAL_CRASH((r << 16) | wq_supported,
				"Root queue initialization failed");
	}
#endif // DISPATCH_USE_INTERNAL_WORKQUEUE
}

_dispatch_worker_thread2函数，重点函数在_dispatch_root_queue_drain

static void
_dispatch_worker_thread2(pthread_priority_t pp)
{
	bool overcommit = pp & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
	dispatch_queue_global_t dq;

	pp &= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG | ~_PTHREAD_PRIORITY_FLAGS_MASK;
	_dispatch_thread_setspecific(dispatch_priority_key, (void *)(uintptr_t)pp);
	dq = _dispatch_get_root_queue(_dispatch_qos_from_pp(pp), overcommit);

	_dispatch_introspection_thread_add();
	_dispatch_trace_runtime_event(worker_unpark, dq, 0);

	int pending = os_atomic_dec2o(dq, dgq_pending, relaxed);
	dispatch_assert(pending >= 0);
	// 开始调用_dispatch_root_queue_drain函数，取出任务
	_dispatch_root_queue_drain(dq, dq->dq_priority,
			DISPATCH_INVOKE_WORKER_DRAIN | DISPATCH_INVOKE_REDIRECTING_DRAIN);
	_dispatch_voucher_debug("root queue clear", NULL);
	_dispatch_reset_voucher(NULL, DISPATCH_THREAD_PARK);
	_dispatch_trace_runtime_event(worker_park, NULL, 0);
}

_dispatch_root_queue_drain函数，循环取出任务，重点函数_dispatch_continuation_pop_inline

DISPATCH_NOT_TAIL_CALLED // prevent tailcall (for Instrument DTrace probe)
static void
_dispatch_root_queue_drain(dispatch_queue_global_t dq,
		dispatch_priority_t pri, dispatch_invoke_flags_t flags)
{
#if DISPATCH_DEBUG
	dispatch_queue_t cq;
	if (unlikely(cq = _dispatch_queue_get_current())) {
		DISPATCH_INTERNAL_CRASH(cq, "Premature thread recycling");
	}
#endif
	_dispatch_queue_set_current(dq);
	_dispatch_init_basepri(pri);
	_dispatch_adopt_wlh_anon();

	struct dispatch_object_s *item;
	bool reset = false;
	dispatch_invoke_context_s dic = { };
#if DISPATCH_COCOA_COMPAT
	_dispatch_last_resort_autorelease_pool_push(&dic);
#endif // DISPATCH_COCOA_COMPAT
	_dispatch_queue_drain_init_narrowing_check_deadline(&dic, pri);
	_dispatch_perfmon_start();
	// 循环取出任务
	while (likely(item = _dispatch_root_queue_drain_one(dq))) {
		if (reset) _dispatch_wqthread_override_reset();
		_dispatch_continuation_pop_inline(item, &dic, flags, dq);
		reset = _dispatch_reset_basepri_override();
		if (unlikely(_dispatch_queue_drain_should_narrow(&dic))) {
			break;
		}
	}

	// overcommit or not. worker thread
	if (pri & DISPATCH_PRIORITY_FLAG_OVERCOMMIT) {
		_dispatch_perfmon_end(perfmon_thread_worker_oc);
	} else {
		_dispatch_perfmon_end(perfmon_thread_worker_non_oc);
	}

#if DISPATCH_COCOA_COMPAT
	_dispatch_last_resort_autorelease_pool_pop(&dic);
#endif // DISPATCH_COCOA_COMPAT
	_dispatch_reset_wlh();
	_dispatch_clear_basepri();
	_dispatch_queue_set_current(NULL);
}

_dispatch_continuation_pop_inline函数，执行dx_invoke，调度出任务的执行函数

DISPATCH_ALWAYS_INLINE_NDEBUG
static inline void
_dispatch_continuation_pop_inline(dispatch_object_t dou,
		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
		dispatch_queue_class_t dqu)
{
	dispatch_pthread_root_queue_observer_hooks_t observer_hooks =
			_dispatch_get_pthread_root_queue_observer_hooks();
	if (observer_hooks) observer_hooks->queue_will_execute(dqu._dq);
	flags &= _DISPATCH_INVOKE_PROPAGATE_MASK;
	if (_dispatch_object_has_vtable(dou)) {
	    // 调度出任务的执行函数
		dx_invoke(dou._dq, dic, flags);
	} else {
		_dispatch_continuation_invoke_inline(dou, flags, dqu);
	}
	if (observer_hooks) observer_hooks->queue_did_execute(dqu._dq);
}

这里有两个分支，一个是自定义队列的，一个是全局并发队列的，自定义队列走_dispatch_async_redirect_invoke，全局并发队列走_dispatch_queue_override_invoke

// _dispatch_async_redirect_invoke
void _dispatch_async_redirect_invoke(dispatch_continuation_t dc,
        dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags)
{
    dispatch_thread_frame_s dtf;
    struct dispatch_continuation_s *other_dc = dc->dc_other;
    dispatch_invoke_flags_t ctxt_flags = (dispatch_invoke_flags_t)dc->dc_ctxt;

    dispatch_queue_t assumed_rq = (dispatch_queue_t)dc->dc_func;
    dispatch_queue_t dq = dc->dc_data, rq, old_dq;
    dispatch_priority_t old_dbp;

    if (ctxt_flags) {
        flags &= ~_DISPATCH_INVOKE_AUTORELEASE_MASK;
        flags |= ctxt_flags;
    }
    old_dq = _dispatch_get_current_queue();
    if (assumed_rq) {
        old_dbp = _dispatch_root_queue_identity_assume(assumed_rq);
        _dispatch_set_basepri(dq->dq_priority);
    } else {
        old_dbp = _dispatch_set_basepri(dq->dq_priority);
    }

    _dispatch_thread_frame_push(&dtf, dq);
    // _dispatch_continuation_pop_forwarded里面就是执行_dispatch_continuation_pop函数
    _dispatch_continuation_pop_forwarded(dc, DISPATCH_NO_VOUCHER,
            DISPATCH_OBJ_CONSUME_BIT, {
        _dispatch_continuation_pop(other_dc, dic, flags, dq);
    });
    _dispatch_thread_frame_pop(&dtf);
    if (assumed_rq) _dispatch_queue_set_current(old_dq);
    _dispatch_reset_basepri(old_dbp);

    rq = dq->do_targetq;
    while (slowpath(rq->do_targetq) && rq != old_dq) {
        _dispatch_queue_non_barrier_complete(rq);
        rq = rq->do_targetq;
    }

    _dispatch_queue_non_barrier_complete(dq);
    _dispatch_release_tailcall(dq); 
}

// _dispatch_queue_override_invoke
void _dispatch_queue_override_invoke(dispatch_continuation_t dc,
        dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags)
{
    dispatch_queue_t old_rq = _dispatch_queue_get_current();
    dispatch_queue_t assumed_rq = dc->dc_other;
    dispatch_priority_t old_dp;
    voucher_t ov = DISPATCH_NO_VOUCHER;
    dispatch_object_t dou;

    dou._do = dc->dc_data;
    old_dp = _dispatch_root_queue_identity_assume(assumed_rq);
    if (dc_type(dc) == DISPATCH_CONTINUATION_TYPE(OVERRIDE_STEALING)) {
        flags |= DISPATCH_INVOKE_STEALING;
    } else {
        // balance the fake continuation push in
        // _dispatch_root_queue_push_override
        _dispatch_trace_continuation_pop(assumed_rq, dou._do);
    }
    // 同样调用_dispatch_continuation_pop函数
    _dispatch_continuation_pop_forwarded(dc, ov, DISPATCH_OBJ_CONSUME_BIT, {
        if (_dispatch_object_has_vtable(dou._do)) {
            dx_invoke(dou._do, dic, flags);
        } else {
            _dispatch_continuation_invoke_inline(dou, ov, flags);
        }
    });
    _dispatch_reset_basepri(old_dp);
    _dispatch_queue_set_current(old_rq);
}

最终都是调用_dispatch_continuation_pop这个函数

void
_dispatch_continuation_pop(dispatch_object_t dou, dispatch_invoke_context_t dic,
		dispatch_invoke_flags_t flags, dispatch_queue_class_t dqu)
{
	_dispatch_continuation_pop_inline(dou, dic, flags, dqu._dq);
}

DISPATCH_ALWAYS_INLINE_NDEBUG
static inline void
_dispatch_continuation_pop_inline(dispatch_object_t dou,
		dispatch_invoke_context_t dic, dispatch_invoke_flags_t flags,
		dispatch_queue_class_t dqu)
{
	dispatch_pthread_root_queue_observer_hooks_t observer_hooks =
			_dispatch_get_pthread_root_queue_observer_hooks();
	if (observer_hooks) observer_hooks->queue_will_execute(dqu._dq);
	flags &= _DISPATCH_INVOKE_PROPAGATE_MASK;
	if (_dispatch_object_has_vtable(dou)) {
		dx_invoke(dou._dq, dic, flags);
	} else {
		_dispatch_continuation_invoke_inline(dou, flags, dqu);
	}
	if (observer_hooks) observer_hooks->queue_did_execute(dqu._dq);
}

执行_dispatch_client_callout函数

static inline void
_dispatch_continuation_invoke_inline(dispatch_object_t dou,
		dispatch_invoke_flags_t flags, dispatch_queue_class_t dqu)
{
	dispatch_continuation_t dc = dou._dc, dc1;
	dispatch_invoke_with_autoreleasepool(flags, {
		uintptr_t dc_flags = dc->dc_flags;
		_dispatch_continuation_voucher_adopt(dc, dc_flags);
		if (!(dc_flags & DC_FLAG_NO_INTROSPECTION)) {
			_dispatch_trace_item_pop(dqu, dou);
		}
		if (dc_flags & DC_FLAG_CONSUME) {
			dc1 = _dispatch_continuation_free_cacheonly(dc);
		} else {
			dc1 = NULL;
		}
		if (unlikely(dc_flags & DC_FLAG_GROUP_ASYNC)) {
			_dispatch_continuation_with_group_invoke(dc);
		} else {
			// 直接执行block函数
			_dispatch_client_callout(dc->dc_ctxt, dc->dc_func);
			_dispatch_trace_item_complete(dc);
		}
		if (unlikely(dc1)) {
			_dispatch_continuation_free_to_cache_limit(dc1);
		}
	});
	_dispatch_perfmon_workitem_inc();
}

单例

单例的使用，只执行一次

static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
    
});

源码分析

void
dispatch_once_f(dispatch_once_t *val, void *ctxt, dispatch_function_t func)
{
	// 如果你来过一次 -- 下次就不来
	dispatch_once_gate_t l = (dispatch_once_gate_t)val;
	//DLOCK_ONCE_DONE
#if !DISPATCH_ONCE_INLINE_FASTPATH || DISPATCH_ONCE_USE_QUIESCENT_COUNTER
	uintptr_t v = os_atomic_load(&l->dgo_once, acquire);
	// 第二次就直接返回
	if (likely(v == DLOCK_ONCE_DONE)) {
		return;
	}
#if DISPATCH_ONCE_USE_QUIESCENT_COUNTER
	if (likely(DISPATCH_ONCE_IS_GEN(v))) {
		return _dispatch_once_mark_done_if_quiesced(l, v);
	}
#endif
#endif

	// 满足条件 -- 试图进去
	if (_dispatch_once_gate_tryenter(l)) {
		// 单利调用 -- v->DLOCK_ONCE_DONE
		return _dispatch_once_callout(l, ctxt, func);
	}
	return _dispatch_once_wait(l);
}

拆解一下函数，首先看下_dispatch_once_gate_tryenter函数

static inline bool
_dispatch_once_gate_tryenter(dispatch_once_gate_t l)
{
	// os 对象是否存储过，如果没有，则把DLOCK_ONCE_UNLOCKED赋值给它
	return os_atomic_cmpxchg(&l->dgo_once, DLOCK_ONCE_UNLOCKED,
			(uintptr_t)_dispatch_lock_value_for_self(), relaxed);
}

然后看下_dispatch_once_callout函数

static void
_dispatch_once_callout(dispatch_once_gate_t l, void *ctxt,
		dispatch_function_t func)
{
	// block() -- 三遍
	_dispatch_client_callout(ctxt, func);
	// 广播一次，告诉别人执行过了
	_dispatch_once_gate_broadcast(l);
}

// _dispatch_once_gate_broadcast
static inline void
_dispatch_once_gate_broadcast(dispatch_once_gate_t l)
{
	dispatch_lock value_self = _dispatch_lock_value_for_self();
	uintptr_t v;
#if DISPATCH_ONCE_USE_QUIESCENT_COUNTER
	// 正在操作
	v = _dispatch_once_mark_quiescing(l);
#else
	// 操作完成
	v = _dispatch_once_mark_done(l);
#endif
	if (likely((dispatch_lock)v == value_self)) return;
	_dispatch_gate_broadcast_slow(&l->dgo_gate, (dispatch_lock)v);
}

// _dispatch_once_mark_done
DISPATCH_ALWAYS_INLINE
static inline uintptr_t
_dispatch_once_mark_done(dispatch_once_gate_t dgo)
{
    // 赋值 DLOCK_ONCE_DONE 给dgo->dgo_once
	return os_atomic_xchg(&dgo->dgo_once, DLOCK_ONCE_DONE, release);
}

信号量

信号量的使用

// 信号量创建
dispatch_semaphore_t sem = dispatch_semaphore_create(1);
// 信号量 -1
dispatch_wait(sem, DISPATCH_TIME_FOREVER);
// 信号量 +1
dispatch_semaphore_signal(sem);

源码分析，dispatch_semaphore_create函数

dispatch_semaphore_t
dispatch_semaphore_create(long value)
{
	dispatch_semaphore_t dsema;

	// If the internal value is negative, then the absolute of the value is
	// equal to the number of waiting threads. Therefore it is bogus to
	// initialize the semaphore with a negative value.
	if (value < 0) {
		return DISPATCH_BAD_INPUT;
	}
	// 创建信号量
	dsema = _dispatch_object_alloc(DISPATCH_VTABLE(semaphore),
			sizeof(struct dispatch_semaphore_s));
	dsema->do_next = DISPATCH_OBJECT_LISTLESS;
	dsema->do_targetq = _dispatch_get_default_queue(false);
	dsema->dsema_value = value; // 赋值
	_dispatch_sema4_init(&dsema->dsema_sema, _DSEMA4_POLICY_FIFO);
	dsema->dsema_orig = value;
	return dsema;
}

dispatch_wait宏，这里分三种情况，我们研究dispatch_semaphore_wait这种情况

#define dispatch_wait(object, timeout) \
		_Generic((object), \
			dispatch_block_t:dispatch_block_wait, \
			dispatch_group_t:dispatch_group_wait, \
			dispatch_semaphore_t:dispatch_semaphore_wait \
		)((object),(timeout))
		
// dispatch_semaphore_wait函数
long
dispatch_semaphore_wait(dispatch_semaphore_t dsema, dispatch_time_t timeout)
{
	// 0-1
	long value = os_atomic_dec2o(dsema, dsema_value, acquire);
	if (likely(value >= 0)) {
		return 0;
	}
	return _dispatch_semaphore_wait_slow(dsema, timeout);
}

// os_atomic_dec2o一直路由得到的就是这个，说白了就是把这些传入的值拼接起来，目的就是为了信号量的值-1
#define os_atomic_sub(p, v, m) \
		_os_atomic_c11_op((p), (v), m, sub, -)

// _dispatch_semaphore_wait_slow
DISPATCH_NOINLINE
static long
_dispatch_semaphore_wait_slow(dispatch_semaphore_t dsema,
		dispatch_time_t timeout)
{
	long orig;

	_dispatch_sema4_create(&dsema->dsema_sema, _DSEMA4_POLICY_FIFO);
	switch (timeout) {
	default:
		if (!_dispatch_sema4_timedwait(&dsema->dsema_sema, timeout)) {
			break;
		}
		// Fall through and try to undo what the fast path did to
		// dsema->dsema_value
	case DISPATCH_TIME_NOW:
		orig = dsema->dsema_value;
		while (orig < 0) {
			if (os_atomic_cmpxchgvw2o(dsema, dsema_value, orig, orig + 1,
					&orig, relaxed)) {
				return _DSEMA4_TIMEOUT();
			}
		}
		// Another thread called semaphore_signal().
		// Fall through and drain the wakeup.
		// 等待信号量，调用semaphore_signal函数时
	case DISPATCH_TIME_FOREVER:
		_dispatch_sema4_wait(&dsema->dsema_sema);
		break;
	}
	return 0;
}

dispatch_semaphore_signal函数，信号量加一

long
dispatch_semaphore_signal(dispatch_semaphore_t dsema)
{
    // 这里拼接起来就是为了信号量的值+1
	long value = os_atomic_inc2o(dsema, dsema_value, release);
	if (likely(value > 0)) {
		return 0;
	}
	if (unlikely(value == LONG_MIN)) {
		DISPATCH_CLIENT_CRASH(value,
				"Unbalanced call to dispatch_semaphore_signal()");
	}
	// 这里信号量加一
	return _dispatch_semaphore_signal_slow(dsema);
}

// _dispatch_semaphore_signal_slow
long
_dispatch_semaphore_signal_slow(dispatch_semaphore_t dsema)
{
	_dispatch_sema4_create(&dsema->dsema_sema, _DSEMA4_POLICY_FIFO);
	_dispatch_sema4_signal(&dsema->dsema_sema, 1);
	return 1;
}

调度组

首先从创建函数dispatch_group_create看起

dispatch_group_t
dispatch_group_create(void)
{
	return _dispatch_group_create_with_count(0);
}

DISPATCH_ALWAYS_INLINE
static inline dispatch_group_t
_dispatch_group_create_with_count(uint32_t n)
{
	dispatch_group_t dg = _dispatch_object_alloc(DISPATCH_VTABLE(group),
			sizeof(struct dispatch_group_s));
	dg->do_next = DISPATCH_OBJECT_LISTLESS;
	// 设置目标队列
	dg->do_targetq = _dispatch_get_default_queue(false);
	if (n) {
		os_atomic_store2o(dg, dg_bits,
				-n * DISPATCH_GROUP_VALUE_INTERVAL, relaxed);
		os_atomic_store2o(dg, do_ref_cnt, 1, relaxed); // <rdar://22318411>
	}
	return dg;
}

再看dispatch_group_async函数

dispatch_group_async

void
dispatch_group_async(dispatch_group_t dg, dispatch_queue_t dq,
		dispatch_block_t db)
{
	dispatch_continuation_t dc = _dispatch_continuation_alloc();
	uintptr_t dc_flags = DC_FLAG_CONSUME | DC_FLAG_GROUP_ASYNC;
	dispatch_qos_t qos;

	// 将work打包成dispatch_continuation_t
	qos = _dispatch_continuation_init(dc, dq, db, 0, dc_flags);
	_dispatch_continuation_group_async(dg, dq, dc, qos);
}

_dispatch_continuation_init函数我们上面分析过了，所以我们就分析_dispatch_continuation_group_async

DISPATCH_ALWAYS_INLINE
static inline void
_dispatch_continuation_group_async(dispatch_group_t dg, dispatch_queue_t dq,
		dispatch_continuation_t dc, dispatch_qos_t qos)
{
	dispatch_group_enter(dg);
	dc->dc_data = dg;
	_dispatch_continuation_async(dq, dc, qos, dc->dc_flags);
}

其实dispatch_group_async内部也是加了dispatch_group_enter函数

后面取出执行block逻辑跟dispatch_async略微不同，前面部分不做多说，调用顺序跟dispatch_async是一样的，唯一不同在于_dispatch_continuation_invoke_inline这个函数。

DISPATCH_ALWAYS_INLINE
static inline void
_dispatch_continuation_invoke_inline(dispatch_object_t dou,
		dispatch_invoke_flags_t flags, dispatch_queue_class_t dqu)
{
	dispatch_continuation_t dc = dou._dc, dc1;
	dispatch_invoke_with_autoreleasepool(flags, {
		uintptr_t dc_flags = dc->dc_flags;
		// Add the item back to the cache before calling the function. This
		// allows the 'hot' continuation to be used for a quick callback.
		//
		// The ccache version is per-thread.
		// Therefore, the object has not been reused yet.
		// This generates better assembly.
		_dispatch_continuation_voucher_adopt(dc, dc_flags);
		if (!(dc_flags & DC_FLAG_NO_INTROSPECTION)) {
			_dispatch_trace_item_pop(dqu, dou);
		}
		if (dc_flags & DC_FLAG_CONSUME) {
			dc1 = _dispatch_continuation_free_cacheonly(dc);
		} else {
			dc1 = NULL;
		}
		if (unlikely(dc_flags & DC_FLAG_GROUP_ASYNC)) {
			// 如果是调度组则走这里
			_dispatch_continuation_with_group_invoke(dc);
		} else {
			// 直接执行block函数
			_dispatch_client_callout(dc->dc_ctxt, dc->dc_func);
			_dispatch_trace_item_complete(dc);
		}
		if (unlikely(dc1)) {
			_dispatch_continuation_free_to_cache_limit(dc1);
		}
	});
	_dispatch_perfmon_workitem_inc();
}

// _dispatch_continuation_with_group_invoke
DISPATCH_ALWAYS_INLINE
static inline void
_dispatch_continuation_with_group_invoke(dispatch_continuation_t dc)
{
	struct dispatch_object_s *dou = dc->dc_data;
	unsigned long type = dx_type(dou);
	if (type == DISPATCH_GROUP_TYPE) {
		// 执行block
		_dispatch_client_callout(dc->dc_ctxt, dc->dc_func);
		_dispatch_trace_item_complete(dc);
		// 调用dispatch_group_leave
		dispatch_group_leave((dispatch_group_t)dou);
	} else {
		DISPATCH_INTERNAL_CRASH(dx_type(dou), "Unexpected object type");
	}
}

我们分析下dispatch_group_enter和dispatch_group_leave

dispatch_group_enter调用和信号量的方式很相似，就调用os_atomic_sub_orig2o把value值-1

// dispatch_group_enter
void
dispatch_group_enter(dispatch_group_t dg)
{
	// The value is decremented on a 32bits wide atomic so that the carry
	// for the 0 -> -1 transition is not propagated to the upper 32bits.
	uint32_t old_bits = os_atomic_sub_orig2o(dg, dg_bits,
			DISPATCH_GROUP_VALUE_INTERVAL, acquire);
	uint32_t old_value = old_bits & DISPATCH_GROUP_VALUE_MASK;
	if (unlikely(old_value == 0)) {
		_dispatch_retain(dg); // <rdar://problem/22318411>
	}
	if (unlikely(old_value == DISPATCH_GROUP_VALUE_MAX)) {
		DISPATCH_CLIENT_CRASH(old_bits,
				"Too many nested calls to dispatch_group_enter()");
	}
}

dispatch_group_leave函数，主要函数在_dispatch_group_wake

void
dispatch_group_leave(dispatch_group_t dg)
{
	// The value is incremented on a 64bits wide atomic so that the carry for
	// the -1 -> 0 transition increments the generation atomically.
	// 调用os_atomic_add_orig2o把value +1
	uint64_t new_state, old_state = os_atomic_add_orig2o(dg, dg_state,
			DISPATCH_GROUP_VALUE_INTERVAL, release);
	uint32_t old_value = (uint32_t)(old_state & DISPATCH_GROUP_VALUE_MASK);
	// 如果old_value等于DISPATCH_GROUP_VALUE_1这个值，才往下面走
	if (unlikely(old_value == DISPATCH_GROUP_VALUE_1)) {
		old_state += DISPATCH_GROUP_VALUE_INTERVAL;
		do {
			new_state = old_state;
			if ((old_state & DISPATCH_GROUP_VALUE_MASK) == 0) {
				new_state &= ~DISPATCH_GROUP_HAS_WAITERS;
				new_state &= ~DISPATCH_GROUP_HAS_NOTIFS;
			} else {
				// If the group was entered again since the atomic_add above,
				// we can't clear the waiters bit anymore as we don't know for
				// which generation the waiters are for
				new_state &= ~DISPATCH_GROUP_HAS_NOTIFS;
			}
			if (old_state == new_state) break;
		} while (unlikely(!os_atomic_cmpxchgv2o(dg, dg_state,
				old_state, new_state, &old_state, relaxed)));
		// 调用_dispatch_group_wake
		return _dispatch_group_wake(dg, old_state, true);
	}
	// 如果old_value经过操作还是等于0，则carsh
	if (unlikely(old_value == 0)) {
		DISPATCH_CLIENT_CRASH((uintptr_t)old_value,
				"Unbalanced call to dispatch_group_leave()");
	}
}

DISPATCH_NOINLINE
static void
_dispatch_group_wake(dispatch_group_t dg, uint64_t dg_state, bool needs_release)
{
	uint16_t refs = needs_release ? 1 : 0; // <rdar://problem/22318411>
	// 如果调度组里面有通知函数，则走进去
	if (dg_state & DISPATCH_GROUP_HAS_NOTIFS) {
		dispatch_continuation_t dc, next_dc, tail;

		// Snapshot before anything is notified/woken <rdar://problem/8554546>
		dc = os_mpsc_capture_snapshot(os_mpsc(dg, dg_notify), &tail);
		do {
			dispatch_queue_t dsn_queue = (dispatch_queue_t)dc->dc_data;
			next_dc = os_mpsc_pop_snapshot_head(dc, tail, do_next);
			// 执行队列里面的任务
			_dispatch_continuation_async(dsn_queue, dc,
					_dispatch_qos_from_pp(dc->dc_priority), dc->dc_flags);
			_dispatch_release(dsn_queue);
		} while ((dc = next_dc));

		refs++;
	}

	if (dg_state & DISPATCH_GROUP_HAS_WAITERS) {
		_dispatch_wake_by_address(&dg->dg_gen);
	}

	if (refs) _dispatch_release_n(dg, refs);
}

下面我们分析dispatch_group_notify函数，主要分析_dispatch_group_notify

void
dispatch_group_notify(dispatch_group_t dg, dispatch_queue_t dq,
		dispatch_block_t db)
{
	dispatch_continuation_t dsn = _dispatch_continuation_alloc();
	_dispatch_continuation_init(dsn, dq, db, 0, DC_FLAG_CONSUME);
	_dispatch_group_notify(dg, dq, dsn);
}

DISPATCH_ALWAYS_INLINE
static inline void
_dispatch_group_notify(dispatch_group_t dg, dispatch_queue_t dq,
		dispatch_continuation_t dsn)
{
	uint64_t old_state, new_state;
	dispatch_continuation_t prev;

	dsn->dc_data = dq;
	_dispatch_retain(dq);

	prev = os_mpsc_push_update_tail(os_mpsc(dg, dg_notify), dsn, do_next);
	if (os_mpsc_push_was_empty(prev)) _dispatch_retain(dg);
	os_mpsc_push_update_prev(os_mpsc(dg, dg_notify), prev, dsn, do_next);
	if (os_mpsc_push_was_empty(prev)) {
		os_atomic_rmw_loop2o(dg, dg_state, old_state, new_state, release, {
			new_state = old_state | DISPATCH_GROUP_HAS_NOTIFS;
			if ((uint32_t)old_state == 0) {
				os_atomic_rmw_loop_give_up({
					return _dispatch_group_wake(dg, new_state, false);
				});
			}
		});
	}
}

最后又走到_dispatch_group_wake这个函数

dispatch_group_enter和dispatch_group_leave是成对出现的，如果dispatch_group_leave调用之后value为0，则调用_dispatch_group_wake函数

参考文章

GCD源码分析（一）
GCD源码分析（二）
GCD源码吐血分析(2)——dispatch_async/dispatch_sync/dispatch_once/dispatch group