netpoll(二) | 青训营笔记

122 阅读3分钟

这是我参与「第五届青训营 」笔记创作活动的第16天

netpoll

昨天的学习中了解到,Listener负责得到Socket对应的fd,在EventLoop中接收各种参数、回调函数启动Server,Server会从poll池中取出一个poll,并把该fd有关的事件注册到该poll中。

PollManager

之所以昨天没有找到poll的wait方法在何处调用,是因为还没有看到以下这个全局变量

var pollmanager *manager

众所周知(还不熟悉go语言的我刚知道),go包在引入的时候会隐式的调用init()函数,该全局变量正是在其中初始化的

func init() {
	var loops = runtime.GOMAXPROCS(0)/20 + 1
	pollmanager = &manager{}
	pollmanager.SetLoadBalance(RoundRobin)//设置负载均衡算法
	pollmanager.SetNumLoops(loops)//设置epoll个数

	setLoggerOutput(os.Stderr)
}

以下是两种负载均衡算法的实现

//随机
func (b *randomLB) Pick() (poll Poll) {
	idx := fastrand.Intn(b.pollSize)
	return b.polls[idx]
}

//轮询
func (b *roundRobinLB) Pick() (poll Poll) {
	idx := int(atomic.AddUintptr(&b.accepted, 1)) % b.pollSize
	return b.polls[idx]
}

设置epoll个数中,打开或关闭使得实际epoll结构体的个数数与设定值相符,而后每个结构体都在系统调用中创建一个对应的epoll,并创建一个协程使其阻塞接收事件

// Run all pollers.
func (m *manager) Run() error {
	// new poll to fill delta.
	for idx := len(m.polls); idx < m.NumLoops; idx++ {
		var poll = openPoll()
		m.polls = append(m.polls, poll)
		go poll.Wait()
	}
	// LoadBalance must be set before calling Run, otherwise it will panic.
	m.balance.Rebalance(m.polls)
	return nil
}

接收事件后,将参数传入handler函数处理

func (p *defaultPoll) Wait() (err error) {
	// init
	var caps, msec, n = barriercap, -1, 0
	p.Reset(128, caps)
	// wait
	for {
		if n == p.size && p.size < 128*1024 {
			p.Reset(p.size<<1, caps)
		}
		n, err = EpollWait(p.fd, p.events, msec)
		if err != nil && err != syscall.EINTR {
			return err
		}
		if n <= 0 {
			msec = -1
			runtime.Gosched()
			continue
		}
		msec = 0
		if p.Handler(p.events[:n]) {
			return nil
		}
		// we can make sure that there is no op remaining if Handler finished
		p.opcache.free()
	}
}

handler会根据poll获取到它属于的server拥有的FDOperator,并根据不同条件执行不同操作,例如异常处理、读、写

func (p *defaultPoll) handler(events []epollevent) (closed bool) {
	for i := range events {
		operator := p.getOperator(0, unsafe.Pointer(&events[i].data))//获取FDOperator
		if operator == nil || !operator.do() {
			continue
		}
		// trigger or exit gracefully
		if operator.FD == p.wop.FD {
			// must clean trigger first
			syscall.Read(p.wop.FD, p.buf)
			atomic.StoreUint32(&p.trigger, 0)
			// if closed & exit
			if p.buf[0] > 0 {
				syscall.Close(p.wop.FD)
				syscall.Close(p.fd)
				operator.done()
				return true
			}
			operator.done()
			continue
		}

		evt := events[i].events
		// check poll in
		if evt&syscall.EPOLLIN != 0 {
			if operator.OnRead != nil {
				// for non-connection
				operator.OnRead(p)
			} else if operator.Inputs != nil {
				// for connection
				var bs = operator.Inputs(p.barriers[i].bs)
				if len(bs) > 0 {
					var n, err = ioread(operator.FD, bs, p.barriers[i].ivs)
					operator.InputAck(n)
					if err != nil {
						p.appendHup(operator)
						continue
					}
				}
			} else {
				logger.Printf("NETPOLL: operator has critical problem! event=%d operator=%v", evt, operator)
			}
		}

		// check hup
		if evt&(syscall.EPOLLHUP|syscall.EPOLLRDHUP) != 0 {
			p.appendHup(operator)
			continue
		}
		if evt&syscall.EPOLLERR != 0 {
			// Under block-zerocopy, the kernel may give an error callback, which is not a real error, just an EAGAIN.
			// So here we need to check this error, if it is EAGAIN then do nothing, otherwise still mark as hup.
			if _, _, _, _, err := syscall.Recvmsg(operator.FD, nil, nil, syscall.MSG_ERRQUEUE); err != syscall.EAGAIN {
				p.appendHup(operator)
			} else {
				operator.done()
			}
			continue
		}
		// check poll out
		if evt&syscall.EPOLLOUT != 0 {
			if operator.OnWrite != nil {
				// for non-connection
				operator.OnWrite(p)
			} else if operator.Outputs != nil {
				// for connection
				var bs, supportZeroCopy = operator.Outputs(p.barriers[i].bs)
				if len(bs) > 0 {
					// TODO: Let the upper layer pass in whether to use ZeroCopy.
					var n, err = iosend(operator.FD, bs, p.barriers[i].ivs, false && supportZeroCopy)
					operator.OutputAck(n)
					if err != nil {
						p.appendHup(operator)
						continue
					}
				}
			} else {
				logger.Printf("NETPOLL: operator has critical problem! event=%d operator=%v", evt, operator)
			}
		}
		operator.done()
	}
	// hup conns together to avoid blocking the poll.
	p.onhups()
	return false
}

Connection

一个客户端来连接时,服务端初始化其结构体

// init initialize the connection with options
func (c *connection) init(conn Conn, opts *options) (err error) {
	// init buffer, barrier, finalizer
	c.readTrigger = make(chan struct{}, 1)
	c.writeTrigger = make(chan error, 1)
	c.bookSize, c.maxSize = block1k/2, pagesize
	c.inputBuffer, c.outputBuffer = NewLinkBuffer(pagesize), NewLinkBuffer()
	c.inputBarrier, c.outputBarrier = barrierPool.Get().(*barrier), barrierPool.Get().(*barrier)

	c.initNetFD(conn) // conn must be *netFD{}
	c.initFDOperator()
	c.initFinalizer()

	syscall.SetNonblock(c.fd, true)
	// enable TCP_NODELAY by default
	switch c.network {
	case "tcp", "tcp4", "tcp6":
		setTCPNoDelay(c.fd, true)
	}
	// check zero-copy
	if setZeroCopy(c.fd) == nil && setBlockZeroCopySend(c.fd, defaultZeroCopyTimeoutSec, 0) == nil {
		c.supportZeroCopy = true
	}

	// connection initialized and prepare options
	return c.onPrepare(opts)
}

准备其回调函数

// OnPrepare supports close connection, but not read/write data.
// connection will be registered by this call after preparing.
func (c *connection) onPrepare(opts *options) (err error) {
	if opts != nil {
		c.SetOnConnect(opts.onConnect)
		c.SetOnRequest(opts.onRequest)
		c.SetReadTimeout(opts.readTimeout)
		c.SetWriteTimeout(opts.writeTimeout)
		c.SetIdleTimeout(opts.idleTimeout)

		// calling prepare first and then register.
		if opts.onPrepare != nil {
			c.ctx = opts.onPrepare(c)
		}
	}

	if c.ctx == nil {
		c.ctx = context.Background()
	}
	// prepare may close the connection.
	if c.IsActive() {
		return c.register()
	}
	return nil
}

其中register选择一个epoll注册事件

// register only use for connection register into poll.
func (c *connection) register() (err error) {
	if c.operator.isUnused() {
		// operator is not registered
		err = c.operator.Control(PollReadable)
	} else {
		// operator is already registered
		// change event to wait read new data
		err = c.operator.Control(PollModReadable)
	}
	if err != nil {
		logger.Printf("NETPOLL: connection register failed: %v", err)
		c.Close()
		return Exception(ErrConnClosed, err.Error())
	}
	return nil
}

读到这里,我有些许奇怪,明明连接对应的epoll事件现在才注册,那先前注册的是什么,便去查了下资料

Multi-Reactor

netpoll采用的应该是Reactor模型中的Multi-Reactor。

  • 一个mainReactor主要负责接收客户端的连接请求,建立新连接,接收完连接后mainReactor就会按照一定的负载均衡策略分发给其中一个subReactor管理
  • 多个subReactor会将新的客户端连接进行管理,负责后续该客户端的请求处理。

至此,我的疑惑解除了。我所理解的netpoll的工作流程如下:

  1. 初始化epoll池,其中有多个epoll
  2. 监听端口,注册回调函数,从epoll池选取一个epoll监听该socket的fd作为mainReactor
  3. mainReactor建立连接后,从epoll池选取一个epoll监听该连接的fd作为subReactor
  4. subReactor负责监听其事件并相应的调用回调函数

这两天也只是大概理了一下其工作流程,其具体实现以及设计待我之后再深入学习