go channel 源码剖析

201 阅读9分钟

channel实现的源码在官方的runtime/chan.go下面,加注释一共700+行代码,下面来看他的实现 channel 底层结构是

type hchan struct {
	qcount   uint           // channel中元素的个数
	dataqsiz uint           // channel循环队列的长度 无缓冲队列为0
	buf      unsafe.Pointer //  channel缓冲区数据指针
	elemsize uint16  //channel收发的元素的大小
	closed   uint32  //channel是否关闭的标识 
	elemtype *_type // channel收发元素的类型
	sendx    uint   // channel发送下一个元素的索引
	recvx    uint   // channel接收下一个元素的索引
	recvq    waitq  // 等待接收数据的goroutinue列表
	sendq    waitq  // 等待发送数据的goroutine列表

	// lock protects all fields in hchan, as well as several
	// fields in sudogs blocked on this channel.
	//
	// Do not change another G's status while holding this lock
	// (in particular, do not ready a G), as this can deadlock
	// with stack shrinking.
	lock mutex//锁
}

Go 语言提供了一种不同的并发模型,即通信顺序进程(Communicating sequential processes,CSP) Goroutine 和 Channel 分别对应 CSP 中的实体和传递信息的媒介,Goroutine 之间会通过 Channel 传递数据

makechan

通常我们会先初始化一个channel,然后往里面塞数据,用另一个协程接收channel中的数据

c := make(chan int,0)
go func(){
		c <- 1
	}()
fmt.Println(<-c)

make方法会在编译之后转换成 runtime.makechan 或者 runtime.makechan64 的函数 ,根据传入的参数类型和缓冲区大小创建一个新的 Channel 结构,其中后者用于处理缓冲区大小大于 2 的 32 次方的情况,因为这在 Channel 中并不常见,所以我们重点关注 runtime.makechan

func makechan(t *chantype, size int) *hchan {
	elem := t.elem
        
	// compiler checks this but be safe.前面都是做一些合法判断
	if elem.size >= 1<<16 {
		throw("makechan: invalid channel element type")
	}
	if hchanSize%maxAlign != 0 || elem.align > maxAlign {
		throw("makechan: bad alignment")
	}

	mem, overflow := math.MulUintptr(elem.size, uintptr(size))
	if overflow || mem > maxAlloc-hchanSize || size < 0 {
		panic(plainError("makechan: size out of range"))
	}

	// Hchan does not contain pointers interesting for GC when elements stored in buf do not contain pointers.
	// buf points into the same allocation, elemtype is persistent.
	// SudoG's are referenced from their owning thread so they can't be collected.
	// TODO(dvyukov,rlh): Rethink when collector can move allocated objects.
	var c *hchan
	switch {
	case mem == 0:
		// Queue or element size is zero. 没有缓冲区,只会为 runtime.hchan 分配一段内存空间
		c = (*hchan)(mallocgc(hchanSize, nil, true))
		// Race detector uses this location for synchronization.
		c.buf = c.raceaddr()
	case elem.ptrdata == 0:
		// Elements do not contain pointers.
		// Allocate hchan and buf in one call.存储的类型不是指针类型,会为当前的 Channel 和底层的数组分配一块连续的内存空间
		c = (*hchan)(mallocgc(hchanSize+mem, nil, true))
		c.buf = add(unsafe.Pointer(c), hchanSize)
	default:
		// Elements contain pointers.单独为 runtime.hchan 和缓冲区分配内存
		c = new(hchan)
		c.buf = mallocgc(mem, elem, true)
	}

	c.elemsize = uint16(elem.size)
	c.elemtype = elem
	c.dataqsiz = uint(size)

	if debugChan {
		print("makechan: chan=", c, "; elemsize=", elem.size, "; dataqsiz=", size, "\n")
	}
	return c
}

发送数据

我们在调用时将 block 参数设置成 true,那么表示当前发送操作是阻塞的

func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
//如果是nil的通道
	if c == nil {
        //不是阻塞的操作
		if !block {
                //发送失败
			return false
		}
                //阻塞的操作,返回错误 chan receive (nil chan)
		gopark(nil, nil, waitReasonChanSendNilChan, traceEvGoStop, 2)
		throw("unreachable")
	}

	if debugChan {
		print("chansend: chan=", c, "\n")
	}

	if raceenabled {
		racereadpc(c.raceaddr(), callerpc, funcPC(chansend))
	}

	// Fast path: check for failed non-blocking operation without acquiring the lock.
	//
	// After observing that the channel is not closed, we observe that the channel is
	// not ready for sending. Each of these observations is a single word-sized read
	// (first c.closed and second c.recvq.first or c.qcount depending on kind of channel).
	// Because a closed channel cannot transition from 'ready for sending' to
	// 'not ready for sending', even if the channel is closed between the two observations,
	// they imply a moment between the two when the channel was both not yet closed
	// and not ready for sending. We behave as if we observed the channel at that moment,
	// and report that the send cannot proceed.
	//
	// It is okay if the reads are reordered here: if we observe that the channel is not
	// ready for sending and then observe that it is not closed, that implies that the
	// channel wasn't closed during the first observation.
        //非阻塞 && 没关闭 && ((非缓冲通道&&没有接收数据协程方)||(有缓冲通道但是已经满员)) 就返回发送失败   关于最后一个条件 有缓冲通道但已经满员,本来可以挂起协程等待空位,但是当前非阻塞不能等的话那就没办法只能返回失败了
	if !block && c.closed == 0 && ((c.dataqsiz == 0 && c.recvq.first == nil) ||
		(c.dataqsiz > 0 && c.qcount == c.dataqsiz)) {
		return false
	}

	var t0 int64
	if blockprofilerate > 0 {
		t0 = cputicks()
	}

        //数据操作前先加锁
	lock(&c.lock)

        //通道已经关闭 解锁 抛出panic
	if c.closed != 0 {
		unlock(&c.lock)
		panic(plainError("send on closed channel"))
	}
        //情况1:如果现在已经有等待数据的协程
	if sg := c.recvq.dequeue(); sg != nil {
		// Found a waiting receiver. We pass the value we want to send
		// directly to the receiver, bypassing the channel buffer (if any). 直接把数据复制给这个协程,不用费劲写到缓冲队列了
		send(c, sg, ep, func() { unlock(&c.lock) }, 3)
		return true
	}
        //情况2:如果没有等待协程,缓冲队列还没满能塞数据
	if c.qcount < c.dataqsiz {
		// Space is available in the channel buffer. Enqueue the element to send.找到能塞数据的地址
		qp := chanbuf(c, c.sendx)
		if raceenabled {
			raceacquire(qp)
			racerelease(qp)
		}
                //复制数据到缓冲区
		typedmemmove(c.elemtype, qp, ep)
                //可以发送数据的索引往后挪一位,因为现在的已经有数据了被占用
		c.sendx++
                //环形列表,满了回0
		if c.sendx == c.dataqsiz {
			c.sendx = 0
		}
                //缓冲区元素数量+1
		c.qcount++
		unlock(&c.lock)
		return true
	}

	if !block {
		unlock(&c.lock)
		return false
	}

	// Block on the channel. Some receiver will complete our operation for us.情况3:缓冲区已满,这个协程只能放在等待队列
	gp := getg()
        //把发送数据的协程相关信息封装在sudog结构体中
	mysg := acquireSudog()
	mysg.releasetime = 0
	if t0 != 0 {
		mysg.releasetime = -1
	}
	// No stack splits between assigning elem and enqueuing mysg
	// on gp.waiting where copystack can find it.
	mysg.elem = ep
	mysg.waitlink = nil
	mysg.g = gp
	mysg.isSelect = false
	mysg.c = c
	gp.waiting = mysg
	gp.param = nil
	c.sendq.enqueue(mysg)
        //把协程挂起
	gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanSend, traceEvGoBlockSend, 2)
	// Ensure the value being sent is kept alive until the
	// receiver copies it out. The sudog has a pointer to the
	// stack object, but sudogs aren't considered as roots of the
	// stack tracer.
	KeepAlive(ep)

	// someone woke us up.
	if mysg != gp.waiting {
		throw("G waiting list is corrupted")
	}
	gp.waiting = nil
	gp.activeStackChans = false
	if gp.param == nil {
		if c.closed == 0 {
			throw("chansend: spurious wakeup")
		}
		panic(plainError("send on closed channel"))
	}
	gp.param = nil
	if mysg.releasetime > 0 {
		blockevent(mysg.releasetime-t0, 2)
	}
	mysg.c = nil
	releaseSudog(mysg)
	return true
}

直接发送给等待的协程

func send(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) {
	if raceenabled {
		if c.dataqsiz == 0 {
			racesync(c, sg)
		} else {
			// Pretend we go through the buffer, even though
			// we copy directly. Note that we need to increment
			// the head/tail locations only when raceenabled.
			qp := chanbuf(c, c.recvx)
			raceacquire(qp)
			racerelease(qp)
			raceacquireg(sg.g, qp)
			racereleaseg(sg.g, qp)
			c.recvx++
			if c.recvx == c.dataqsiz {
				c.recvx = 0
			}
			c.sendx = c.recvx // c.sendx = (c.sendx+1) % c.dataqsiz
		}
	}
	if sg.elem != nil {
                //步骤一 把数据直接赋值给等待的协程
		sendDirect(c.elemtype, sg, ep)
		sg.elem = nil
	}
	gp := sg.g
	unlockf()
	gp.param = unsafe.Pointer(sg)
	if sg.releasetime != 0 {
		sg.releasetime = cputicks()
	}
        //步骤二 将等待接收数据的 Goroutine 标记成可运行状态 Grunnable 并把该 Goroutine 放到发送方所在的处理器的 runnext 上等待执行,该处理器在下一次调度时会立刻唤醒数据的接收方 并不是立即执行
	goready(gp, skip+1)
}

接收数据

func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) {
	// raceenabled: don't need to check ep, as it is always on the stack
	// or is new memory allocated by reflect.

	if debugChan {
		print("chanrecv: chan=", c, "\n")
	}

        //nil的通道接收数据会失败
	if c == nil {
		if !block {
			return
		}
               
		gopark(nil, nil, waitReasonChanReceiveNilChan, traceEvGoStop, 2)
		throw("unreachable")
	}

	// Fast path: check for failed non-blocking operation without acquiring the lock.
	//
	// After observing that the channel is not ready for receiving, we observe that the
	// channel is not closed. Each of these observations is a single word-sized read
	// (first c.sendq.first or c.qcount, and second c.closed).
	// Because a channel cannot be reopened, the later observation of the channel
	// being not closed implies that it was also not closed at the moment of the
	// first observation. We behave as if we observed the channel at that moment
	// and report that the receive cannot proceed.
	//
	// The order of operations is important here: reversing the operations can lead to
	// incorrect behavior when racing with a close.
	if !block && (c.dataqsiz == 0 && c.sendq.first == nil ||
		c.dataqsiz > 0 && atomic.Loaduint(&c.qcount) == 0) &&
		atomic.Load(&c.closed) == 0 {
		return
	}

	var t0 int64
	if blockprofilerate > 0 {
		t0 = cputicks()
	}
        //数据操作前先加锁
	lock(&c.lock)

        //已经关闭并且缓冲区没有数据 直接返回
	if c.closed != 0 && c.qcount == 0 {
		if raceenabled {
			raceacquire(c.raceaddr())
		}
		unlock(&c.lock)
		if ep != nil {
			typedmemclr(c.elemtype, ep)
		}
		return true, false
	}
        //情况1:发送协程有等待的 
	if sg := c.sendq.dequeue(); sg != nil {
		// Found a waiting sender. If buffer is size 0, receive value
		// directly from sender. Otherwise, receive from head of queue
		// and add sender's value to the tail of the queue (both map to
		// the same buffer slot because the queue is full).直接复制给发送方 不用费劲放缓冲区
		recv(c, sg, ep, func() { unlock(&c.lock) }, 3)
		return true, true
	}
        //情况2:缓冲区有数据待接收
	if c.qcount > 0 {
		// Receive directly from queue
		qp := chanbuf(c, c.recvx)
		if raceenabled {
			raceacquire(qp)
			racerelease(qp)
		}
		if ep != nil {
			typedmemmove(c.elemtype, ep, qp)
		}
		typedmemclr(c.elemtype, qp)
                //接收索引+1 因为现在的已经被拿走了
		c.recvx++
		if c.recvx == c.dataqsiz {
			c.recvx = 0
		}
		c.qcount--
		unlock(&c.lock)
		return true, true
	}

	if !block {
		unlock(&c.lock)
		return false, false
	}
        //情况3:缓冲区没有数据,把接收数据协程挂起等待有协程发送数据
	// no sender available: block on this channel.
	gp := getg()
        //把协程封装成sudog的结构体
	mysg := acquireSudog()
	mysg.releasetime = 0
	if t0 != 0 {
		mysg.releasetime = -1
	}
	// No stack splits between assigning elem and enqueuing mysg
	// on gp.waiting where copystack can find it.
	mysg.elem = ep
	mysg.waitlink = nil
	gp.waiting = mysg
	mysg.g = gp
	mysg.isSelect = false
	mysg.c = c
	gp.param = nil
	c.recvq.enqueue(mysg)
	gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanReceive, traceEvGoBlockRecv, 2)

	// someone woke us up
	if mysg != gp.waiting {
		throw("G waiting list is corrupted")
	}
	gp.waiting = nil
	gp.activeStackChans = false
	if mysg.releasetime > 0 {
		blockevent(mysg.releasetime-t0, 2)
	}
	closed := gp.param == nil
	gp.param = nil
	mysg.c = nil
	releaseSudog(mysg)
	return true, !closed
}

情况1由协程等待的时候,调用了recv方法

func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) {
        //无缓冲区
	if c.dataqsiz == 0 {
		if raceenabled {
			racesync(c, sg)
		}
		if ep != nil {
			// copy data from sender
                        //直接把协程数据复制给接收方
			recvDirect(c.elemtype, sg, ep)
		}
	} else {
		// Queue is full. Take the item at the
		// head of the queue. Make the sender enqueue
		// its item at the tail of the queue. Since the
		// queue is full, those are both the same slot.
		qp := chanbuf(c, c.recvx)
		if raceenabled {
			raceacquire(qp)
			racerelease(qp)
			raceacquireg(sg.g, qp)
			racereleaseg(sg.g, qp)
		}
		// copy data from queue to receiver
		if ep != nil {
                //把缓冲队列里的数据复制给接收方
			typedmemmove(c.elemtype, ep, qp)
		}
		// copy data from sender to queue
                //现在缓冲区空了一个位置,把当前等待发送数据协程数据复制到这个空位
		typedmemmove(c.elemtype, qp, sg.elem)
                //当前位置应该是队列末尾,下一个要接收的缓冲数据索引应该是下一个所以+1
		c.recvx++
                //环形,满了回0
		if c.recvx == c.dataqsiz {
			c.recvx = 0
		}
                //发送数据的索引应该和接收是一致的,因为现在缓冲区是满了的状态 当最后一个发送协程取完了,缓冲区空一格位置的时候  sendx应该也是指向这一个位置,这里就是这个作用
		c.sendx = c.recvx // c.sendx = (c.sendx+1) % c.dataqsiz
	}
	sg.elem = nil
	gp := sg.g
	unlockf()
	gp.param = unsafe.Pointer(sg)
	if sg.releasetime != 0 {
		sg.releasetime = cputicks()
	}
        //唤起挂起的协程
	goready(gp, skip+1)
}

关闭通道

func closechan(c *hchan) {
//关闭nil通道 会panic
	if c == nil {
		panic(plainError("close of nil channel"))
	}

	lock(&c.lock)
        //关闭已经关闭的通道 会panic
	if c.closed != 0 {
		unlock(&c.lock)
		panic(plainError("close of closed channel"))
	}

	if raceenabled {
		callerpc := getcallerpc()
		racewritepc(c.raceaddr(), callerpc, funcPC(closechan))
		racerelease(c.raceaddr())
	}

	c.closed = 1

	var glist gList

	// release all readers
	for {
		sg := c.recvq.dequeue()
		if sg == nil {
			break
		}
		if sg.elem != nil {
			typedmemclr(c.elemtype, sg.elem)
			sg.elem = nil
		}
		if sg.releasetime != 0 {
			sg.releasetime = cputicks()
		}
		gp := sg.g
		gp.param = nil
		if raceenabled {
			raceacquireg(gp, c.raceaddr())
		}
		glist.push(gp)
	}

	// release all writers (they will panic)
	for {
		sg := c.sendq.dequeue()
		if sg == nil {
			break
		}
		sg.elem = nil
		if sg.releasetime != 0 {
			sg.releasetime = cputicks()
		}
		gp := sg.g
		gp.param = nil
		if raceenabled {
			raceacquireg(gp, c.raceaddr())
		}
		glist.push(gp)
	}
	unlock(&c.lock)

	// Ready all Gs now that we've dropped the channel lock.
	for !glist.empty() {
		gp := glist.pop()
		gp.schedlink = 0
		goready(gp, 3)
	}
}

往一个nil的channel里面发送接收数据都会失败

image.png

image.png

问题 为啥不会panic ,注释去掉就panic

image.png

一年后追加: 我认为不panic是因为main主进程已经结束,所有里面的报错还没来得及走到就被跟着主进程一起结束了,去掉注释,又执行了一行代码拖延一点时间,所以协程里面的发送数据执行到了,所以panic,跟具体最后一行代码里面是什么没有多大关系,具体证据如下,如果最后在hang住几秒就会给协程足够的时间执行到报panic,所以是go func里面的panic,不是最后一行代码导致的,从一个关闭的通道读取数据不会panic,发送数据会

image.png

另外在网上看到的,close通道的时候发送数据协程队列底层源码解析说了这句话

image.png

验证了一下,两个协程给一个只有缓冲1个元素的通道赛数据,其中一个会进入发送数据协程等待队列里面,关闭通道之后结果是两个协程随机的panic,j jo随机出现

image.png

参考 zhuanlan.zhihu.com/p/62391727