这是我参与「第五届青训营」笔记创作活动的第14天

IO多路复用

IO模型

IO在计算机中指输入和输出。程序在运行时操作的数据必须在内存中，涉及到数据交换的地方，通常是磁盘、网络等，就需要IO。

进程无法直接操作I/O设备，必须通过系统调用，先把数据读取到内核缓冲区，再从内核缓冲区复制数据到进程空间。

BIO(同步阻塞IO)：进程发起IO系统调用后，进程被阻塞，转到内核空间处理，整个IO处理完毕后返回进程，操作成功则进程获取到数据。

NIO(同步非阻塞IO)：进程发起IO系统调用后，进程不阻塞，不断轮询查看内核缓冲区是否有数据

IO复用(异步阻塞IO)：一个线程内同时检查多Socket的内核缓冲区是否准备好数据，可用则拷贝到用户空间处理

异步IO(异步非阻塞IO)：内核数据准备好，并拷贝到用户态后通知线程处理

select&poll&epoll

三者都是linux内核实现IO多路复用的实现

select

select()机制中提供一fd_set的数据结构，实际上是一long类型的数组，每一个数组元素都能与一打开的文件句柄（不管是socket句柄，还是其他文件或命名管道或设备句柄）建立联系，建立联系的工作由程序员完成，当调用select()时，由内核根据IO状态修改fd_set的内容，由此来通知执行了select()的进程哪一socket或文件发生了可读或可写事件。

每次调用 select，都需要把 fd 集合从用户态拷贝到内核态
内核采用的是线性遍历的方式，fd较多时开销大
select 支持的文件描述符数量有限，默认是 1024

poll

poll 的原理与 select 非常相似，区别在于poll使用 pollfd 链表结构，不受数量限制

epoll

epoll是被动触发，在获取事件的时候，无须遍历整个被侦听的描述符集，只要遍历那些被内核IO事件异步唤醒而加入Ready队列的描述符集合就行了

就绪列表是 fd 事件就绪之后放置的特殊地点，epoll 池只需要遍历这个就绪链表，就能给用户返回所有已经就绪的 fd 数组
内部管理 fd 使用了高效的红黑树结构管理，做到了增删改之后性能的优化和平衡
水平触发LT：当被监控的文件描述符上有可读写事件发生时，epoll_wait()会通知处理程序去读写。如果这次没有把数据一次性全部读写完(如读写缓冲区太小)，那么下次调用 epoll_wait()时，它还会通知你在上次没读写完的文件描述符上继续读写
边缘触发ET：当被监控的文件描述符上有可读写事件发生时，epoll_wait()会通知处理程序去读写。如果这次没有把数据全部读写完(如读写缓冲区太小)，那么下次调用epoll_wait()时，它不会通知你，也就是它只会通知你一次，直到该文件描述符上出现第二次可读写事件才会通知你

简单实践

这里是Go语言的IO多路复用TCP服务器简单实现：

epoll相关

创建Epoll

func MkEpoll() (*epoll, error) {
	fd, err := unix.EpollCreate1(0)
	if err != nil {
		return nil, err
	}
	return &epoll{
		fd:          fd,
		lock:        &sync.RWMutex{},
		connections: make(map[int]net.Conn),
	}, nil
}

Epoll池注册事件

func (e *epoll) Add(conn net.Conn) error {
	// Extract file descriptor associated with the connection
	fd := socketFD(conn)
	err := unix.EpollCtl(e.fd, syscall.EPOLL_CTL_ADD, fd, &unix.EpollEvent{Events: unix.POLLIN | unix.POLLHUP, Fd: int32(fd)})
	if err != nil {
		return err
	}
	e.lock.Lock()
	defer e.lock.Unlock()
	e.connections[fd] = conn
	if len(e.connections)%100 == 0 {
		log.Printf("total number of connections: %v", len(e.connections))
	}
	return nil
}

注册前需要将连接转换为fd描述符

func socketFD(conn net.Conn) int {
	tcpConn := reflect.Indirect(reflect.ValueOf(conn)).FieldByName("conn")
	fdVal := tcpConn.FieldByName("fd")
	pfdVal := reflect.Indirect(fdVal).FieldByName("pfd")

	return int(pfdVal.FieldByName("Sysfd").Int())
}

获取准备好的的就绪队列

func (e *epoll) Wait() ([]net.Conn, error) {
	events := make([]unix.EpollEvent, 100)
retry:
	n, err := unix.EpollWait(e.fd, events, 100)
	if err != nil {
		if err == unix.EINTR {
			goto retry
		}
		return nil, err
	}
	e.lock.RLock()
	defer e.lock.RUnlock()
	var connections []net.Conn
	for i := 0; i < n; i++ {
		conn := e.connections[int(events[i].Fd)]
		connections = append(connections, conn)
	}
	return connections, nil
}

在Epoll中注销该连接

func (e *epoll) Remove(conn net.Conn) error {
	fd := socketFD(conn)
retry:
	err := unix.EpollCtl(e.fd, syscall.EPOLL_CTL_DEL, fd, nil)
	if err != nil {
		if err == unix.EINTR {
			goto retry
		}
		return err
	}
	e.lock.Lock()
	defer e.lock.Unlock()
	delete(e.connections, fd)
	if len(e.connections)%100 == 0 {
		log.Printf("total number of connections: %v", len(e.connections))
	}
	return nil
}

主函数

选择端口号监听

ln, err := net.Listen("tcp", ":8080")

创建epoll，并把接收到的连接加入epoll

    ln, err := net.Listen("tcp", ":8080")
	for {
		conn, e := ln.Accept()
		if e != nil {
			if ne, ok := e.(net.Error); ok && ne.Temporary() {
				log.Printf("accept temp err: %v", ne)
				continue
			}

			log.Printf("accept err: %v", e)
			return
		}

		if err := epoller.Add(conn); err != nil {
			log.Printf("failed to add connection %v", err)
			conn.Close()
		}
	}

遍历就绪队列，处理数据，以下代码由于与上面的代码都是阻塞的，二者需要并发运行

	for {
		connections, err := epoller.Wait()
		if err != nil {
			log.Printf("failed to epoll wait %v", err)
			continue
		}
		for _, conn := range connections {
			if conn == nil {
				break
			}

			// 将消息(时间戳)原封不动的写回
			_, err = io.CopyN(conn, conn, 8)
			if err != nil {
				if err := epoller.Remove(conn); err != nil {
					log.Printf("failed to remove %v", err)
				}
				conn.Close()
			}

			opsRate.Mark(1)
		}
	}

IO多路复用 ｜ 青训营笔记