golang netpool源码分析

893 阅读8分钟

go version: 1.17

结构体

type TCPListener struct {
   fd *netFD
   lc ListenConfig
}
// Network file descriptor.
type netFD struct {
   pfd poll.FD     //文件描述符

   // immutable until Close
   family      int
   sotype      int
   isConnected bool // 是否已建立连接
   net         string
   laddr       Addr
   raddr       Addr
}
// FD is a file descriptor. The net and os packages use this type as a
// field of a larger type representing a network connection or OS file.
type FD struct {
   // Lock sysfd and serialize access to Read and Write methods.
   fdmu fdMutex

   // System file descriptor. Immutable until Close.
   Sysfd int  // socket的系统文件描述符

   // I/O poller.
   pd pollDesc

   // Writev cache.
   iovecs *[]syscall.Iovec

   // Semaphore signaled when file is closed.
   csema uint32

   // Non-zero if this file has been set to blocking mode.
   isBlocking uint32

   // Whether this is a streaming descriptor, as opposed to a
   // packet-based descriptor like a UDP socket. Immutable.
   IsStream bool

   // Whether a zero byte read indicates EOF. This is false for a
   // message based socket connection.
   ZeroReadIsEOF bool

   // Whether this is a file rather than a network socket.
   isFile bool   // true代表文件描述符,false代表网络连接描述符
}
type pollDesc struct {
   link *pollDesc // in pollcache, protected by pollcache.lock

   // The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations.
   // This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime.
   // pollReset, pollWait, pollWaitCanceled and runtime·netpollready (IO readiness notification)
   // proceed w/o taking the lock. So closing, everr, rg, rd, wg and wd are manipulated
   // in a lock-free way by all operations.
   // NOTE(dvyukov): the following code uses uintptr to store *g (rg/wg),
   // that will blow up when GC starts moving objects.
   lock    mutex // protects the following fields
   fd      uintptr
   closing bool
   everr   bool      // marks event scanning error happened
   user    uint32    // user settable cookie
   rseq    uintptr   // protects from stale read timers
   rg      uintptr   // 指向读数据的被gopark的g 
   rt      timer     // 读超时timer
   rd      int64     // 读超时时间
   wseq    uintptr   // protects from stale write timers
   wg      uintptr   // 指向写数据的被gopark的g 
   wt      timer     // 写超时timer
   wd      int64     // 读超时时间
   self    *pollDesc // storage for indirect interface. See (*pollDesc).makeArg.
}

以上几个结构体的关系如下 image.png

listen

import (
   "fmt"
   "net"
)

func main() {
   listen, err := net.Listen("tcp", ":8080")
   if err != nil {
      fmt.Println("listen error: ", err)
      return
   }

   for {
      conn, err := listen.Accept()
      if err != nil {
         fmt.Println("accept error: ", err)
         break
      }

      go HandleConn(conn)
   }
}
func HandleConn(conn net.Conn) {
   defer conn.Close()
   buf := make([]byte, 1024)
   for {
      _, _ = conn.Read(buf)
      _, _ = conn.Write(buf)
   }
}

listen的调用顺序

net.Listen

func Listen(network, address string) (Listener, error) {
   var lc ListenConfig
   return lc.Listen(context.Background(), network, address)
}

ListenConfig.Listen

func (lc *ListenConfig) Listen(ctx context.Context, network, address string) (Listener, error) {
   addrs, err := DefaultResolver.resolveAddrList(ctx, "listen", network, address, nil)
   if err != nil {
      return nil, &OpError{Op: "listen", Net: network, Source: nil, Addr: nil, Err: err}
   }
   sl := &sysListener{
      ListenConfig: *lc,
      network:      network,
      address:      address,
   }
   var l Listener
   la := addrs.first(isIPv4)
   switch la := la.(type) {
   case *TCPAddr:
      l, err = sl.listenTCP(ctx, la)
      
   (...)
   return l, nil
}

sysListener.listenTCP

func (sl *sysListener) listenTCP(ctx context.Context, laddr *TCPAddr) (*TCPListener, error) {
   //这里目前mode为"listen",另外还有"dial"表示client发起请求
   fd, err := internetSocket(ctx, sl.network, laddr, nil, syscall.SOCK_STREAM, 0, "listen", sl.ListenConfig.Control)
   if err != nil {
      return nil, err
   }
   return &TCPListener{fd: fd, lc: sl.ListenConfig}, nil
}

internetSocket

func internetSocket(ctx context.Context, net string, laddr, raddr sockaddr, sotype, proto int, mode string, ctrlFn func(string, string, syscall.RawConn) error) (fd *netFD, err error) {
   
   if (runtime.GOOS == "aix" || runtime.GOOS == "windows" || runtime.GOOS == "openbsd") && mode == "dial" && raddr.isWildcard() {
      raddr = raddr.toLocal(net)
   }
   family, ipv6only := favoriteAddrFamily(net, laddr, raddr, mode)
   return socket(ctx, net, family, sotype, proto, ipv6only, laddr, raddr, ctrlFn)
}

socket

func socket(ctx context.Context, net string, family, sotype, proto int, ipv6only bool, laddr, raddr sockaddr, ctrlFn func(string, string, syscall.RawConn) error) (fd *netFD, err error) {
   //创建socket,返回系统文件描述符
   s, err := sysSocket(family, sotype, proto)
   if err != nil {
      return nil, err
   }
   if err = setDefaultSockopts(s, family, sotype, ipv6only); err != nil {
      poll.CloseFunc(s)
      return nil, err
   }
   // 新建一个newFD
   if fd, err = newFD(s, family, sotype, net); err != nil {
      poll.CloseFunc(s)
      return nil, err
   }

   (...)
   
   if laddr != nil && raddr == nil {
      switch sotype {
      // TCP协议
      case syscall.SOCK_STREAM, syscall.SOCK_SEQPACKET:
         if err := fd.listenStream(laddr, listenerBacklog(), ctrlFn); err != nil {
            fd.Close()
            return nil, err
         }
         return fd, nil
      // UDP协议
      case syscall.SOCK_DGRAM:
         if err := fd.listenDatagram(laddr, ctrlFn); err != nil {
            fd.Close()
            return nil, err
         }
         return fd, nil
      }
   }
   // 走到这里代表非监听逻辑,为client发起连接
   if err := fd.dial(ctx, laddr, raddr, ctrlFn); err != nil {
      fd.Close()
      return nil, err
   }
   return fd, nil
}

func newFD(sysfd, family, sotype int, net string) (*netFD, error) {
   ret := &netFD{
      pfd: poll.FD{
         Sysfd:         sysfd,
         IsStream:      sotype == syscall.SOCK_STREAM,
         ZeroReadIsEOF: sotype != syscall.SOCK_DGRAM && sotype != syscall.SOCK_RAW,
      },
      family: family,
      sotype: sotype,
      net:    net,
   }
   return ret, nil
}

listenStream

func (fd *netFD) listenStream(laddr sockaddr, backlog int, ctrlFn func(string, string, syscall.RawConn) error) error {
   (...)
   // 绑定操作
   if err = syscall.Bind(fd.pfd.Sysfd, lsa); err != nil {
      return os.NewSyscallError("bind", err)
   }
   // 监听操作
   if err = listenFunc(fd.pfd.Sysfd, backlog); err != nil {
      return os.NewSyscallError("listen", err)
   }
   // 初始化fd
   if err = fd.init(); err != nil {
      return err
   }
   lsa, _ = syscall.Getsockname(fd.pfd.Sysfd)
   fd.setAddr(fd.addrFunc()(lsa), nil)
   return nil
}

pollDesc.init

func (pd *pollDesc) init(fd *FD) error {
   serverInit.Do(runtime_pollServerInit)
   ctx, errno := runtime_pollOpen(uintptr(fd.Sysfd))
   if errno != 0 {
      return errnoErr(syscall.Errno(errno))
   }
   pd.runtimeCtx = ctx
   return nil
}

runtime_pollServerInit方法只会被调用一次,

func poll_runtime_pollServerInit() {
   netpollGenericInit()
}

func netpollGenericInit() {
   if atomic.Load(&netpollInited) == 0 {
      lockInit(&netpollInitLock, lockRankNetpollInit)
      lock(&netpollInitLock)
      if netpollInited == 0 {
         //继续调用
         netpollinit()
         atomic.Store(&netpollInited, 1)
      }
      unlock(&netpollInitLock)
   }
}
var ( 
    epfd int32 = -1
)
func netpollinit() {
   // 创建新的epoll文件描述符
   epfd = epollcreate1(_EPOLL_CLOEXEC)
   if epfd < 0 {
      epfd = epollcreate(1024)
      if epfd < 0 {
         println("runtime: epollcreate failed with", -epfd)
         throw("runtime: netpollinit failed")
      }
      closeonexec(epfd)
   }
   r, w, errno := nonblockingPipe()
   if errno != 0 {
      println("runtime: pipe failed with", -errno)
      throw("runtime: pipe failed")
   }
   ev := epollevent{
      events: _EPOLLIN,
   }
   *(**uintptr)(unsafe.Pointer(&ev.data)) = &netpollBreakRd
   //将文件描述符加入监听
   errno = epollctl(epfd, _EPOLL_CTL_ADD, r, &ev)
   if errno != 0 {
      println("runtime: epollctl failed with", -errno)
      throw("runtime: epollctl failed")
   }
   netpollBreakRd = uintptr(r)
   netpollBreakWr = uintptr(w)
}
func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) {
   pd := pollcache.alloc()
   lock(&pd.lock)
   if pd.wg != 0 && pd.wg != pdReady {
      throw("runtime: blocked write on free polldesc")
   }
   if pd.rg != 0 && pd.rg != pdReady {
      throw("runtime: blocked read on free polldesc")
   }
   pd.fd = fd
   pd.closing = false
   pd.everr = false
   pd.rseq++
   pd.rg = 0
   pd.rd = 0
   pd.wseq++
   pd.wg = 0
   pd.wd = 0
   pd.self = pd
   unlock(&pd.lock)

   errno := netpollopen(fd, pd)
   if errno != 0 {
      pollcache.free(pd)
      return nil, int(errno)
   }
   return pd, 0
}

poll_runtime_pollOpen会调用pollcache.alloc生成大小为4KB的pollDesc结构体,并串成链表

const pollBlockSize = 4 * 1024

type pollCache struct {
   lock  mutex
   first *pollDesc
   // PollDesc objects must be type-stable,
   // because we can get ready notification from epoll/kqueue
   // after the descriptor is closed/reused.
   // Stale notifications are detected using seq variable,
   // seq is incremented when deadlines are changed or descriptor is reused.
}

func (c *pollCache) alloc() *pollDesc {
   lock(&c.lock)
   if c.first == nil {
      const pdSize = unsafe.Sizeof(pollDesc{})
      // 总大小为4KB
      n := pollBlockSize / pdSize
      if n == 0 {
         n = 1
      }
      // Must be in non-GC memory because can be referenced
      // only from epoll/kqueue internals.
      mem := persistentalloc(n*pdSize, 0, &memstats.other_sys)
      for i := uintptr(0); i < n; i++ {
         pd := (*pollDesc)(add(mem, i*pdSize))
         pd.link = c.first
         c.first = pd
      }
   }
   pd := c.first
   c.first = pd.link
   lockInit(&pd.lock, lockRankPollDesc)
   unlock(&c.lock)
   return pd
}

alloc就是预先分配共4KB大小的pollDesc结构体,并把他们串成链表,并将first返回,在pollDesc.init中赋值给pd.runtimeCtx image.png

image.png 目前fd.pfd.Sysfd已经完成了绑定和监听,fd.pfd.pd也被初始化完成,接下来分析accept

accept

func (l *TCPListener) Accept() (Conn, error) {
   if !l.ok() {
      return nil, syscall.EINVAL
   }
   c, err := l.accept()
   if err != nil {
      return nil, &OpError{Op: "accept", Net: l.fd.net, Source: nil, Addr: l.fd.laddr, Err: err}
   }
   return c, nil
}
func (ln *TCPListener) accept() (*TCPConn, error) {
   fd, err := ln.fd.accept()
   if err != nil {
      return nil, err
   }
   tc := newTCPConn(fd)
   if ln.lc.KeepAlive >= 0 {
      setKeepAlive(fd, true)
      ka := ln.lc.KeepAlive
      if ln.lc.KeepAlive == 0 {
         ka = defaultTCPKeepAlive
      }
      setKeepAlivePeriod(fd, ka)
   }
   return tc, nil
}
func (fd *netFD) accept() (netfd *netFD, err error) {
   // 调用poll.FD的Accept,接收新的socket连接,返回socket的fd
   d, rsa, errcall, err := fd.pfd.Accept()
   if err != nil {
      if errcall != "" {
         err = wrapSyscallError(errcall, err)
      }
      return nil, err
   }

   // 用socket的fd创建新的netFD,代表这个新的socket
   if netfd, err = newFD(d, fd.family, fd.sotype, fd.net); err != nil {
      poll.CloseFunc(d)
      return nil, err
   }
   // 调用netFD的init初始化
   if err = netfd.init(); err != nil {
      netfd.Close()
      return nil, err
   }
   lsa, _ := syscall.Getsockname(netfd.pfd.Sysfd)
   netfd.setAddr(netfd.addrFunc()(lsa), netfd.addrFunc()(rsa))
   return netfd, nil
}
func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) {
   if err := fd.readLock(); err != nil {
      return -1, nil, "", err
   }
   defer fd.readUnlock()

   if err := fd.pd.prepareRead(fd.isFile); err != nil {
      return -1, nil, "", err
   }
   for {
      // 调用系统调用accept
      s, rsa, errcall, err := accept(fd.Sysfd)
      // 因为是非阻塞模式,所以accept会直接返回,err == nil代表正常建立连接,直接返回
      if err == nil {
         return s, rsa, "", err
      }
      switch err {
      case syscall.EINTR:
         continue
      // err != nil,判断是否为EAGAIN
      case syscall.EAGAIN:
         if fd.pd.pollable() {
            // 调用poolDesc.waitRead
            if err = fd.pd.waitRead(fd.isFile); err == nil {
               continue
            }
         }
      case syscall.ECONNABORTED:
         // This means that a socket on the listen
         // queue was closed before we Accept()ed it;
         // it's a silly error, so try again.
         continue
      }
      return -1, nil, errcall, err
   }
}

netFD.accept会调用poll.FD.Accept,再调用Linux系统调用accept,并且把accpetsocket设置成非阻塞模式

func accept(s int) (int, syscall.Sockaddr, string, error) {
   (...)
   ns, sa, err := AcceptFunc(s)
   if err == nil {
      syscall.CloseOnExec(ns)
   }
   if err != nil {
      return -1, nil, "accept", err
   }
   if err = syscall.SetNonblock(ns, true); err != nil {
      CloseFunc(ns)
      return -1, nil, "setnonblock", err
   }
   return ns, sa, "", nil
}

image.png

read

func (c *conn) Read(b []byte) (int, error) {
   if !c.ok() {
      return 0, syscall.EINVAL
   }
   n, err := c.fd.Read(b)
   if err != nil && err != io.EOF {
      err = &OpError{Op: "read", Net: c.fd.net, Source: c.fd.laddr, Addr: c.fd.raddr, Err: err}
   }
   return n, err
}
func (fd *netFD) Read(p []byte) (n int, err error) {
   n, err = fd.pfd.Read(p)
   runtime.KeepAlive(fd)
   return n, wrapSyscallError(readSyscallName, err)
}
// Read implements io.Reader.
func (fd *FD) Read(p []byte) (int, error) {
   if err := fd.readLock(); err != nil {
      return 0, err
   }
   defer fd.readUnlock()
   if len(p) == 0 {
      // If the caller wanted a zero byte read, return immediately
      // without trying (but after acquiring the readLock).
      // Otherwise syscall.Read returns 0, nil which looks like
      // io.EOF.
      // TODO(bradfitz): make it wait for readability? (Issue 15735)
      return 0, nil
   }
   if err := fd.pd.prepareRead(fd.isFile); err != nil {
      return 0, err
   }
   if fd.IsStream && len(p) > maxRW {
      p = p[:maxRW]
   }
   for {
      n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p)
      if err != nil {
         n = 0
         // EAGAIN表示没有I/O事件发生,接下来通过goparkblock住
         if err == syscall.EAGAIN && fd.pd.pollable() {
            if err = fd.pd.waitRead(fd.isFile); err == nil {
               continue
            }
         }
      }
      err = fd.eofError(n, err)
      return n, err
   }
}

pollDesc.waitRead 内部调用了 poll.runtime_pollWait --> runtime.poll_runtime_pollWait 来达成无 I/O 事件时 park 住 goroutine 的目的

func poll_runtime_pollWait(pd *pollDesc, mode int) int {
   errcode := netpollcheckerr(pd, int32(mode))
   if errcode != pollNoError {
      return errcode
   }
   // As for now only Solaris, illumos, and AIX use level-triggered IO.
   if GOOS == "solaris" || GOOS == "illumos" || GOOS == "aix" {
      netpollarm(pd, mode)
   }
   // netpollblock判断是否有I/O事件
   // for循环是为了一直等到io ready
   for !netpollblock(pd, int32(mode), false) {
      errcode = netpollcheckerr(pd, int32(mode))
      if errcode != pollNoError {
         return errcode
      }
      // Can happen if timeout has fired and unblocked us,
      // but before we had a chance to run, timeout has been reset.
      // Pretend it has not happened and retry.
   }
   return pollNoError
}

func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
   // gpp存到是goroutine的数据结构g,这里根据mode选择是rg还是wg
   //rg和wg是保存等待I/O就绪的goroutine的,下面调用gopark后,会将g存入rg或者wg中
   gpp := &pd.rg
   if mode == 'w' {
      gpp = &pd.wg
   }

   // set the gpp semaphore to pdWait
   // for循环为了等待io ready或者io wait
   for {
      old := *gpp
      if old == pdReady {
         *gpp = 0
         return true
      }
      if old != 0 {
         throw("runtime: double wait")
      }
      // 没有期望的I/O事件,则用原子操作把gpp设置为pdwait,并退出循环
      if atomic.Casuintptr(gpp, 0, pdWait) {
         break
      }
   }

   // need to recheck error states after setting gpp to pdWait
   // this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
   // do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
   // waitio 此时是 false,netpollcheckerr 方法会检查当前 pollDesc 对应的 fd 是否是正常的,
   // 通常来说  netpollcheckerr(pd, mode) == 0 是成立的,所以这里会执行 gopark
   // 把当前 goroutine 给 park 住,直至对应的 fd 上发生可读/可写或者其他『期待的』I/O 事件为止,
   // 然后 unpark 返回,在 gopark 内部会把当前 goroutine 的抽象数据结构 g 存入
   // gpp(pollDesc.rg/pollDesc.wg) 指针里,以便在后面的 netpoll 函数取出 pollDesc 之后,
   // 把 g 添加到链表里返回,接着重新调度 goroutine
   if waitio || netpollcheckerr(pd, mode) == 0 {
      // 注册 netpollblockcommit 回调给 gopark,在 gopark 内部会执行它,保存当前 goroutine 到 gpp
      gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5)
   }
   // be careful to not lose concurrent pdReady notification
   old := atomic.Xchguintptr(gpp, 0)
   if old > pdWait {
      throw("runtime: corrupted polldesc")
   }
   return old == pdReady
}

func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceEv byte, traceskip int) {
   if reason != waitReasonSleep {
      checkTimeouts() // timeouts may expire while two goroutines keep the scheduler busy
   }
   // 返回当前的m
   mp := acquirem()
   // 返回当前的g
   gp := mp.curg
   status := readgstatus(gp)
   if status != _Grunning && status != _Gscanrunning {
      throw("gopark: bad g status")
   }
   mp.waitlock = lock
   mp.waitunlockf = unlockf
   gp.waitreason = reason
   mp.waittraceev = traceEv
   mp.waittraceskip = traceskip
   releasem(mp)
   // can't do anything that might move the G between Ms here.
   // park_m会调用传进来的unlockf
   mcall(park_m)
}

func park_m(gp *g) {
   _g_ := getg()

   if trace.enabled {
      traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip)
   }

   casgstatus(gp, _Grunning, _Gwaiting)
   dropg()

   if fn := _g_.m.waitunlockf; fn != nil {
      // 在这里调用netpollblockcommit
      ok := fn(gp, _g_.m.waitlock)
      _g_.m.waitunlockf = nil
      _g_.m.waitlock = nil
      if !ok {
         if trace.enabled {
            traceGoUnpark(gp, 2)
         }
         casgstatus(gp, _Gwaiting, _Grunnable)
         execute(gp, true) // Schedule it back, never returns.
      }
   }
   schedule()
}

总是就是将当前goroutine的g保存到pollDesc中到rg或者wg,再通过schedule调用其他到goroutine去运行

image.png

netpoll

上面分析完netpoll通过gopark达到阻塞Accept/Read/Write的效果,gopark会将goroutine存在pollDesc的wg/rg,并且此时g的状态从_Grunning切换到_Gwaiting,而这个g要通过goready去唤醒.
当I/O事件发生后,是通过epoll_wait来唤醒被gopark的goroutine的,调用netpoll的地方如下图所示:

image.png

func netpoll(delay int64) gList {
   if epfd == -1 {
      return gList{}
   }
   var waitms int32
   //delay单位是纳秒,这里转换成毫秒
   if delay < 0 {
      waitms = -1
   } else if delay == 0 {
      waitms = 0
   } else if delay < 1e6 {
      waitms = 1
   } else if delay < 1e15 {
      waitms = int32(delay / 1e6)
   } else {
      // An arbitrary cap on how long to wait for a timer.
      // 1e9 ms == ~11.5 days.
      waitms = 1e9
   }
   var events [128]epollevent
retry:
   // 返回就绪的fd list
   n := epollwait(epfd, &events[0], int32(len(events)), waitms)
   // 返回负值,retry
   if n < 0 {
      if n != -_EINTR {
         println("runtime: epollwait on fd", epfd, "failed with", -n)
         throw("runtime: netpoll failed")
      }
      // If a timed sleep was interrupted, just return to
      // recalculate how long we should sleep now.
      if waitms > 0 {
         return gList{}
      }
      goto retry
   }
   // toRun用于存放发生I/O时间即将运行的g
   var toRun gList
   for i := int32(0); i < n; i++ {
      ev := &events[i]
      if ev.events == 0 {
         continue
      }

      if *(**uintptr)(unsafe.Pointer(&ev.data)) == &netpollBreakRd {
         if ev.events != _EPOLLIN {
            println("runtime: netpoll: break fd ready for", ev.events)
            throw("runtime: netpoll: break fd ready for something unexpected")
         }
         if delay != 0 {
            // netpollBreak could be picked up by a
            // nonblocking poll. Only read the byte
            // if blocking.
            var tmp [16]byte
            read(int32(netpollBreakRd), noescape(unsafe.Pointer(&tmp[0])), int32(len(tmp)))
            atomic.Store(&netpollWakeSig, 0)
         }
         continue
      }
      // 判断类型类型,给mode赋值,再根据mode的值决定去rg还是wg里去g
      var mode int32
      if ev.events&(_EPOLLIN|_EPOLLRDHUP|_EPOLLHUP|_EPOLLERR) != 0 {
         mode += 'r'
      }
      if ev.events&(_EPOLLOUT|_EPOLLHUP|_EPOLLERR) != 0 {
         mode += 'w'
      }
      if mode != 0 {
         pd := *(**pollDesc)(unsafe.Pointer(&ev.data))
         pd.everr = false
         if ev.events == _EPOLLERR {
            pd.everr = true
         }
         netpollready(&toRun, pd, mode)
      }
   }
   return toRun
}

func netpollready(toRun *gList, pd *pollDesc, mode int32) {
   var rg, wg *g
   // 根据mode获取对应的g
   if mode == 'r' || mode == 'r'+'w' {
      rg = netpollunblock(pd, 'r', true)
   }
   if mode == 'w' || mode == 'r'+'w' {
      wg = netpollunblock(pd, 'w', true)
   }
   // 将获取的g都放入toRun中
   if rg != nil {
      toRun.push(rg)
   }
   if wg != nil {
      toRun.push(wg)
   }
}

func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
   gpp := &pd.rg
   // 根据mode判断拿rg还是wg
   if mode == 'w' {
      gpp = &pd.wg
   }

   for {
      // 取出gpp存的g
      old := *gpp
      if old == pdReady {
         return nil
      }
      if old == 0 && !ioready {
         // Only set pdReady for ioready. runtime_pollWait
         // will check for timeout/cancel before waiting.
         return nil
      }
      var new uintptr
      if ioready {
         new = pdReady
      }
      if atomic.Casuintptr(gpp, old, new) {
         if old == pdWait {
            old = 0
         }
         // 返回g指针
         return (*g)(unsafe.Pointer(old))
      }
   }
}

sysmon中,会调用netpoll,代码如下:

    list := netpoll(0) // non-blocking - returns list of goroutines
    if !list.empty() {
       // Need to decrement number of idle locked M's
       // (pretending that one more is running) before injectglist.
       // Otherwise it can lead to the following situation:
       // injectglist grabs all P's but before it starts M's to run the P's,
       // another M returns from syscall, finishes running its G,
       // observes that there is no work to do and no other running M's
       // and reports deadlock.
       incidlelocked(-1)
       injectglist(&list)
       incidlelocked(1)
    }
func injectglist(glist *gList) {
   if glist.empty() {
      return
   }
   if trace.enabled {
      for gp := glist.head.ptr(); gp != nil; gp = gp.schedlink.ptr() {
         traceGoUnpark(gp, 0)
      }
   }

   // Mark all the goroutines as runnable before we put them
   // on the run queues.
   head := glist.head.ptr()
   var tail *g
   // qsize为glist的长度
   qsize := 0
   for gp := head; gp != nil; gp = gp.schedlink.ptr() {
      tail = gp
      qsize++
      casgstatus(gp, _Gwaiting, _Grunnable)
   }

   // Turn the gList into a gQueue.
   var q gQueue
   q.head.set(head)
   q.tail.set(tail)
   *glist = gList{}

   // 把所有的g放到q中
   startIdle := func(n int) {
      for ; n != 0 && sched.npidle != 0; n-- {
         startm(nil, false)
      }
   }

   pp := getg().m.p.ptr()
   // 如果当前没有p,则全部放到全局队列,并且返回
   if pp == nil {
      lock(&sched.lock)
      globrunqputbatch(&q, int32(qsize))
      unlock(&sched.lock)
      startIdle(qsize)
      return
   }
    //获取空闲p的数量
   npidle := int(atomic.Load(&sched.npidle))
   var globq gQueue
   var n int
   for n = 0; n < npidle && !q.empty(); n++ {
      g := q.pop()
      // 有几个idle的p,globq里就放几个g
      globq.pushBack(g)
   }
   //将globq全部放入空闲队列里
   if n > 0 {
      lock(&sched.lock)
      globrunqputbatch(&globq, int32(n))
      unlock(&sched.lock)
      startIdle(n)
      qsize -= n
   }
    //走到这里,代表当前g有对应的p,并且glist的g比空闲p的数量还要多,那么多出来的这些g,就放入p的本地队列
   if !q.empty() {
      runqputbatch(pp, &q, qsize)
   }
}

image.png