golang select 源码解析

1,380 阅读4分钟

golang select 源码解析

背景

golang 中主推 channel 通信。单个 channel 的通信可以通过一个goroutinechannel 发数据,另外一个从channel取数据进行。这是阻塞的,因为要想顺利执行完这个步骤,需要 channel 准备好才行,准备好的条件如下:

  1. 发送
    • 缓存有空间(如果是有缓存的 channel)
    • 有等待接收的 goroutine
  2. 接收
    • 缓存有数据(如果是有缓存的 channel)
    • 有等待发送的 goroutine

channel实际使用中还有如下两个需求,这个时候就需要select了。

  1. 同时监听多个channel
  2. 在没有channel准备好的时候,也可以往下执行

select 流程

准备工作

  1. select当一个select中没有任何case的时候,会阻塞当前goroutine\color{red}{当一个 select 中没有任何case的时候,会阻塞当前 goroutine}。不要用for{}来阻塞goroutine,因为会占用cpu。而select{}不会,因为当前goroutine会放弃执行权限,也不会被放到任何待执行的任务队列中。

    if len(cases) == 0 {
      block()
    }
    
  2. 配置好poll的顺序。由于是同时监听多个channel的发送或者接收,所以需要按照一定的顺序查看哪个channel准备好了。如果每次\color{red}{每次}采用select中的顺序查看channel是否准备好了,那么只要在前面的channel准备好的足够快,则会造成后面的channel即使准备好了也永远不会被执行。所以需要打乱遍历的顺序,打乱顺序的逻辑如下,此过程采用了洗牌算法\color{red}{洗牌算法}注意此过程中会过滤掉channelnilcase\color{red}{注意此过程中会过滤掉 channel为nil的case}

    // generate permuted order
    norder := 0
    for i := range scases {
      cas := &scases[i]
    
      // Omit cases without channels from the poll and lock orders.
      if cas.c == nil {
        cas.elem = nil // allow GC
        continue
      }
    
      j := fastrandn(uint32(norder + 1))
      pollorder[norder] = pollorder[j]
      pollorder[j] = uint16(i)
      norder++
    }
    
  3. 配置好lock的顺序。由于可能会修改channel中的数据,所以在打算往channel中发送数据或者从channel接收数据的时候,需要锁住 channel。而一个channel可能被多个select监听,如果两个select对两个channel A和B分别按照顺序A, B和B,A上锁是可能会造成死锁的,导致两个select都永远执行不下去。

    截屏2022-12-30 下午3.03.05.png

    所以select中锁住channel的顺序至关重要,解决方案是按照channel的地址的顺序锁住channel。因为在两个selectchannel有交集的时候,都是按照交集中channel的地址顺序锁channel。实际排序代码如下,采用堆排序算法按照channel的地址从小到大对channel进行排序。

    // sort the cases by Hchan address to get the locking order.
    // simple heap sort, to guarantee n log n time and constant stack footprint.
    for i := range lockorder {
      j := i
      // Start with the pollorder to permute cases on the same channel.
      c := scases[pollorder[i]].c
      for j > 0 && scases[lockorder[(j-1)/2]].c.sortkey() < c.sortkey() {
        k := (j - 1) / 2
        lockorder[j] = lockorder[k]
        j = k
      }
      lockorder[j] = pollorder[i]
    }
    for i := len(lockorder) - 1; i >= 0; i-- {
      o := lockorder[i]
      c := scases[o].c
      lockorder[i] = lockorder[0]
      j := 0
      for {
        k := j*2 + 1
        if k >= i {
          break
        }
        if k+1 < i && scases[lockorder[k]].c.sortkey() < scases[lockorder[k+1]].c.sortkey() {
          k++
        }
        if c.sortkey() < scases[lockorder[k]].c.sortkey() {
          lockorder[j] = lockorder[k]
          j = k
          continue
        }
        break
      }
      lockorder[j] = o
    }
    

第一轮

  1. 第一轮查看是否已有准备好channel。注意selectchannel切片中,前面部分是往channel发送数据的case,后半部分是从channel接收数据的case。

    截屏2022-12-30 下午4.06.39.png

    按照pollorder顺序查看是否有channel准备好了。

    for _, casei := range pollorder {
      casi = int(casei)
      cas = &scases[casi]
      c = cas.c
    
      if casi >= nsends {
        sg = c.sendq.dequeue()
        if sg != nil {
          goto recv
        }
        if c.qcount > 0 {
          goto bufrecv
        }
        if c.closed != 0 {
          goto rclose
        }
      } else {
        if raceenabled {
          racereadpc(c.raceaddr(), casePC(casi), chansendpc)
        }
        if c.closed != 0 {
          goto sclose
        }
        sg = c.recvq.dequeue()
        if sg != nil {
          goto send
        }
        if c.qcount < c.dataqsiz {
          goto bufsend
        }
      }
    }
    
  2. send场景

    if c.closed != 0 {
      goto sclose
    }
    sg = c.recvq.dequeue()
    if sg != nil {
      goto send
    }
    if c.qcount < c.dataqsiz {
      goto bufsend
    }
    
    • channel已经close,直接panic

      sclose:
      	// send on closed channel
      	selunlock(scases, lockorder)
      	panic(plainError("send on closed channel"))
      }
      
    • 有阻塞等待读取的goroutine。对于有缓存的channel来说缓存是空的,因为有goroutine阻塞在从这个channel读取数据上。

      send:
      	// can send to a sleeping receiver (sg)
      	if raceenabled {
      		raceReadObjectPC(c.elemtype, cas.elem, casePC(casi), chansendpc)
      	}
      	if msanenabled {
      		msanread(cas.elem, c.elemtype.size)
      	}
      	if asanenabled {
      		asanread(cas.elem, c.elemtype.size)
      	}
      	send(c, sg, cas.elem, func() { selunlock(scases, lockorder) }, 2)
      	if debugSelect {
      		print("syncsend: cas0=", cas0, " c=", c, "\n")
      	}
      	goto retc
      
    • 缓存未满,把数据放到缓存中。

      bufsend:
      	// can send to buffer
      	if raceenabled {
      		racenotify(c, c.sendx, nil)
      		raceReadObjectPC(c.elemtype, cas.elem, casePC(casi), chansendpc)
      	}
      	if msanenabled {
      		msanread(cas.elem, c.elemtype.size)
      	}
      	if asanenabled {
      		asanread(cas.elem, c.elemtype.size)
      	}
      	typedmemmove(c.elemtype, chanbuf(c, c.sendx), cas.elem)
      	c.sendx++
      	if c.sendx == c.dataqsiz {
      		c.sendx = 0
      	}
      	c.qcount++
      	selunlock(scases, lockorder)
      	goto retc
      
  3. recv场景

    sg = c.sendq.dequeue()
    if sg != nil {
      goto recv
    }
    if c.qcount > 0 {
      goto bufrecv
    }
    if c.closed != 0 {
      goto rclose
    }
    
    • 有阻塞发送的goroutine

      recv:
      	// can receive from sleeping sender (sg)
      	recv(c, sg, cas.elem, func() { selunlock(scases, lockorder) }, 2)//对于缓存满了的情况,当前goroutine会先从缓存中取数据,然后sender会把数据放到缓存中。
      	if debugSelect {
      		print("syncrecv: cas0=", cas0, " c=", c, "\n")
      	}
      	recvOK = true
      	goto retc
      
    • 缓存有数据

      bufrecv:
      	// can receive from buffer
      	if raceenabled {
      		if cas.elem != nil {
      			raceWriteObjectPC(c.elemtype, cas.elem, casePC(casi), chanrecvpc)
      		}
      		racenotify(c, c.recvx, nil)
      	}
      	if msanenabled && cas.elem != nil {
      		msanwrite(cas.elem, c.elemtype.size)
      	}
      	if asanenabled && cas.elem != nil {
      		asanwrite(cas.elem, c.elemtype.size)
      	}
      	recvOK = true
      	qp = chanbuf(c, c.recvx)
      	if cas.elem != nil {
      		typedmemmove(c.elemtype, cas.elem, qp)
      	}
      	typedmemclr(c.elemtype, qp)
      	c.recvx++
      	if c.recvx == c.dataqsiz {
      		c.recvx = 0
      	}
      	c.qcount--
      	selunlock(scases, lockorder)
      	goto retc
      
    • channel已经关闭,直接panic

      rclose:
      	// read at end of closed channel
      	selunlock(scases, lockorder)
      	recvOK = false
      	if cas.elem != nil {
      		typedmemclr(c.elemtype, cas.elem)
      	}
      	if raceenabled {
      		raceacquire(c.raceaddr())
      	}
      	goto retc
      

非阻塞

对于有select中有default分支的,是非阻塞场景。由于没有channel准备好数据,所以直接执行default分支。

if !block {
  selunlock(scases, lockorder)
  casi = -1
  goto retc
}

第二轮

阻塞当前goroutine,等待被唤醒。

  1. 创建sudog放到每个channel的等待链表中去,等待channel准备好时被唤醒。
  2. 把第一步对应的sudog放到当前goroutinewaiting链表上去,用于阻塞当前goroutine的时候解锁对应的channel
// pass 2 - enqueue on all chans
gp = getg()
if gp.waiting != nil {
  throw("gp.waiting != nil")
}
nextp = &gp.waiting
for _, casei := range lockorder {
  casi = int(casei)
  cas = &scases[casi]
  c = cas.c
  sg := acquireSudog()
  sg.g = gp
  sg.isSelect = true
  // No stack splits between assigning elem and enqueuing
  // sg on gp.waiting where copystack can find it.
  sg.elem = cas.elem
  sg.releasetime = 0
  if t0 != 0 {
    sg.releasetime = -1
  }
  sg.c = c
  // Construct waiting list in lock order.
  *nextp = sg
  nextp = &sg.waitlink

  if casi < nsends {
    c.sendq.enqueue(sg)
  } else {
    c.recvq.enqueue(sg)
  }
}

第三轮

此时当前select被某个准备好channel唤醒

  1. 清理当前goroutinewaiting链表,第二轮步骤2的反向操作。
  2. 找到是被哪个channel唤醒的;并清理在其他未被唤醒的channel上当前goroutine对应的等待sudog;因为当前select某个case已经满足条件了,不用再在其他case上阻塞了。
sellock(scases, lockorder)

gp.selectDone = 0
sg = (*sudog)(gp.param)
gp.param = nil

// pass 3 - dequeue from unsuccessful chans
// otherwise they stack up on quiet channels
// record the successful case, if any.
// We singly-linked up the SudoGs in lock order.
casi = -1
cas = nil
caseSuccess = false
sglist = gp.waiting
// Clear all elem before unlinking from gp.waiting.
for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink {
  sg1.isSelect = false
  sg1.elem = nil
  sg1.c = nil
}
gp.waiting = nil

for _, casei := range lockorder {
  k = &scases[casei]
  if sg == sglist {
    // sg has already been dequeued by the G that woke us up.
    casi = int(casei)
    cas = k
    caseSuccess = sglist.success
    if sglist.releasetime > 0 {
      caseReleaseTime = sglist.releasetime
    }
  } else {
    c = k.c
    if int(casei) < nsends {
      c.sendq.dequeueSudoG(sglist)
    } else {
      c.recvq.dequeueSudoG(sglist)
    }
  }
  sgnext = sglist.waitlink
  sglist.waitlink = nil
  releaseSudog(sglist)
  sglist = sgnext
}