- 经过之前的学习,咱们已经学会了计时器模块、idle、prepare、check 模块。下面咱们继续学习
uv_run
中最后一个,也是最难的一个模块 —— poll io
poll io
是 libuv 非常重要的一个阶段,文件 io、网络 io、信号处理等都在这 个阶段处理
uv_run
int uv_run(uv_loop_t* loop, uv_run_mode mode) {
int timeout;
int r;
int ran_pending;
r = uv__loop_alive(loop);
if (!r)
uv__update_time(loop);
while (r != 0 && loop->stop_flag == 0) {
uv__update_time(loop);
uv__run_timers(loop);
ran_pending = uv__run_pending(loop);
uv__run_idle(loop);
uv__run_prepare(loop);
timeout = 0;
if ((mode == UV_RUN_ONCE && !ran_pending) || mode == UV_RUN_DEFAULT)
timeout = uv__backend_timeout(loop);
uv__io_poll(loop, timeout);
uv__metrics_update_idle_time(loop);
uv__run_check(loop);
uv__run_closing_handles(loop);
if (mode == UV_RUN_ONCE) {
uv__update_time(loop);
uv__run_timers(loop);
}
r = uv__loop_alive(loop);
if (mode == UV_RUN_ONCE || mode == UV_RUN_NOWAIT)
break;
}
if (loop->stop_flag != 0)
loop->stop_flag = 0;
return r;
}
源码解析
- 在正式讲解
uv__io_poll
源码之前,首先让咱们来认识一个核心概念和数据结构 ———— io观察者
io 观察者
基本操作
uv__io_init
void uv__io_init(uv__io_t* w, uv__io_cb cb, int fd) {
assert(cb != NULL);
assert(fd >= -1);
QUEUE_INIT(&w->pending_queue);
QUEUE_INIT(&w->watcher_queue);
w->cb = cb;
w->fd = fd;
w->events = 0;
w->pevents = 0;
#if defined(UV_HAVE_KQUEUE)
w->rcount = 0;
w->wcount = 0;
#endif
}
uv__io_start
void uv__io_start(uv_loop_t* loop, uv__io_t* w, unsigned int events) {
assert(0 == (events & ~(POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI)));
assert(0 != events);
assert(w->fd >= 0);
assert(w->fd < INT_MAX);
w->pevents |= events;
maybe_resize(loop, w->fd + 1);
#if !defined(__sun)
if (w->events == w->pevents)
return;
#endif
if (QUEUE_EMPTY(&w->watcher_queue))
QUEUE_INSERT_TAIL(&loop->watcher_queue, &w->watcher_queue);
if (loop->watchers[w->fd] == NULL) {
loop->watchers[w->fd] = w;
loop->nfds++;
}
}
uv__io_stop
void uv__io_stop(uv_loop_t* loop, uv__io_t* w, unsigned int events) {
assert(0 == (events & ~(POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI)));
assert(0 != events);
if (w->fd == -1)
return;
assert(w->fd >= 0);
if ((unsigned) w->fd >= loop->nwatchers)
return;
w->pevents &= ~events;
if (w->pevents == 0) {
QUEUE_REMOVE(&w->watcher_queue);
QUEUE_INIT(&w->watcher_queue);
w->events = 0;
if (w == loop->watchers[w->fd]) {
assert(loop->nfds > 0);
loop->watchers[w->fd] = NULL;
loop->nfds--;
}
}
else if (QUEUE_EMPTY(&w->watcher_queue))
QUEUE_INSERT_TAIL(&loop->watcher_queue, &w->watcher_queue);
}
epoll 相关知识点
- epoll是Linux下多路复用IO接口select/poll的增强版本,它能显著提高程序在大量并发连接中只有少量活跃的情况下的系统CPU利用率,因为它会复用文件描述符集合来传递结果而不用迫使开发者每次等待事件之前都必须重新准备要被侦听的文件描述符集合,另一点原因就是获取事件的时候,它无须遍历整个被侦听的描述符集,只要遍历那些被内核IO事件异步唤醒而加入Ready队列的描述符集合就行了
基础 API
epoll_create
:创建一个epoll句柄,参数size用来告诉内核监听的文件描述符的个数,跟内存大小有关
#include <sys/epoll.h>
int epoll_create(int size) size:监听数目
epoll_ctl
:控制某个epoll监控的文件描述符上的事件:注册、修改、删除
#include <sys/epoll.h>
int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
epfd: 为epoll_creat的句柄
op: 表示动作,用3个宏来表示:
EPOLL_CTL_ADD (注册新的fd到epfd),
EPOLL_CTL_MOD (修改已经注册的fd的监听事件),
EPOLL_CTL_DEL (从epfd删除一个fd);
event: 告诉内核需要监听的事件
struct epoll_event {
__uint32_t events;
epoll_data_t data;
};
typedef union epoll_data {
void *ptr;
int fd;
uint32_t u32;
uint64_t u64;
} epoll_data_t;
**EPOLLIN** : 表示对应的文件描述符可以读(包括对端SOCKET正常关闭)
**EPOLLOUT**: 表示对应的文件描述符可以写
EPOLLPRI: 表示对应的文件描述符有紧急的数据可读(这里应该表示有带外数据到来)
**EPOLLERR**: 表示对应的文件描述符发生错误
EPOLLHUP: 表示对应的文件描述符被挂断;
EPOLLET: 将EPOLL设为边缘触发(Edge Triggered)模式,这是相对于水平触发(Level Triggered)而言的
EPOLLONESHOT: 只监听一次事件,当监听完这次事件之后,如果还需要继续监听这个socket的话,需要再次把这个socket加入到EPOLL队列里
epoll_wait
:等待所监控文件描述符上有事件的产生
#include <sys/epoll.h>
int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout)
events: 用来存内核得到事件的集合,
maxevents: 告之内核这个events有多大,这个maxevents的值不能大于创建epoll_create()时的size,
timeout: 是超时时间
-1: 阻塞
0: 立即返回,非阻塞
>0: 指定毫秒
返回值: 成功返回有多少文件描述符就绪,时间到时返回0,出错返回-1
uv__epoll_init
uv__io_poll
第一部分
- 遍历io观察者队列,根据 uv__io_t 句柄初始化 epoll_event 结构体,之后调用 epoll_ctl 来修改文件描述符上的事件
if (loop->nfds == 0) {
assert(QUEUE_EMPTY(&loop->watcher_queue));
return;
}
memset(&e, 0, sizeof(e));
while (!QUEUE_EMPTY(&loop->watcher_queue)) {
q = QUEUE_HEAD(&loop->watcher_queue);
QUEUE_REMOVE(q);
QUEUE_INIT(q);
w = QUEUE_DATA(q, uv__io_t, watcher_queue);
assert(w->pevents != 0);
assert(w->fd >= 0);
assert(w->fd < (int) loop->nwatchers);
e.events = w->pevents;
e.data.fd = w->fd;
if (w->events == 0)
op = EPOLL_CTL_ADD;
else
op = EPOLL_CTL_MOD;
if (epoll_ctl(loop->backend_fd, op, w->fd, &e)) {
if (errno != EEXIST)
abort();
assert(op == EPOLL_CTL_ADD);
if (epoll_ctl(loop->backend_fd, EPOLL_CTL_MOD, w->fd, &e))
abort();
}
w->events = w->pevents;
}
第二部分
- 主要是调用 epoll wait 来监控文件描述符上事件的产生
sigmask = 0;
if (loop->flags & UV_LOOP_BLOCK_SIGPROF) {
sigemptyset(&sigset);
sigaddset(&sigset, SIGPROF);
sigmask |= 1 << (SIGPROF - 1);
}
assert(timeout >= -1);
base = loop->time;
count = 48;
real_timeout = timeout;
if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) {
reset_timeout = 1;
user_timeout = timeout;
timeout = 0;
} else {
reset_timeout = 0;
user_timeout = 0;
}
no_epoll_pwait = uv__load_relaxed(&no_epoll_pwait_cached);
no_epoll_wait = uv__load_relaxed(&no_epoll_wait_cached);
for (;;) {
if (timeout != 0)
uv__metrics_set_provider_entry_time(loop);
if (sizeof(int32_t) == sizeof(long) && timeout >= max_safe_timeout)
timeout = max_safe_timeout;
if (sigmask != 0 && no_epoll_pwait != 0)
if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
abort();
if (no_epoll_wait != 0 || (sigmask != 0 && no_epoll_pwait == 0)) {
nfds = epoll_pwait(loop->backend_fd,
events,
ARRAY_SIZE(events),
timeout,
&sigset);
if (nfds == -1 && errno == ENOSYS) {
uv__store_relaxed(&no_epoll_pwait_cached, 1);
no_epoll_pwait = 1;
}
} else {
nfds = epoll_wait(loop->backend_fd,
events,
ARRAY_SIZE(events),
timeout);
if (nfds == -1 && errno == ENOSYS) {
uv__store_relaxed(&no_epoll_wait_cached, 1);
no_epoll_wait = 1;
}
}
if (sigmask != 0 && no_epoll_pwait != 0)
if (pthread_sigmask(SIG_UNBLOCK, &sigset, NULL))
abort();
SAVE_ERRNO(uv__update_time(loop));
第三部分
- 这里开始处理 io 事件,执行 io 观察者里保存的回调。但是有一个特殊的地方 就是信号处理的 io 观察者需要单独判断。他是一个全局的 io 观察者,和一般动态申请和销毁的 io 观察者不一样,他是存在于 libuv 运行的整个生命周期。
if (nfds == 0) {
assert(timeout != -1);
if (reset_timeout != 0) {
timeout = user_timeout;
reset_timeout = 0;
}
if (timeout == -1)
continue;
if (timeout == 0)
return;
goto update_timeout;
}
if (nfds == -1) {
if (errno == ENOSYS) {
assert(no_epoll_wait == 0 || no_epoll_pwait == 0);
continue;
}
if (errno != EINTR)
abort();
if (reset_timeout != 0) {
timeout = user_timeout;
reset_timeout = 0;
}
if (timeout == -1)
continue;
if (timeout == 0)
return;
goto update_timeout;
}
have_signals = 0;
nevents = 0;
{
union {
struct epoll_event* events;
uv__io_t* watchers;
} x;
x.events = events;
assert(loop->watchers != NULL);
loop->watchers[loop->nwatchers] = x.watchers;
loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds;
}
for (i = 0; i < nfds; i++) {
pe = events + i;
fd = pe->data.fd;
if (fd == -1)
continue;
assert(fd >= 0);
assert((unsigned) fd < loop->nwatchers);
w = loop->watchers[fd];
if (w == NULL) {
epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, pe);
continue;
}
pe->events &= w->pevents | POLLERR | POLLHUP;
if (pe->events == POLLERR || pe->events == POLLHUP)
pe->events |=
w->pevents & (POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI);
if (pe->events != 0) {
if (w == &loop->signal_io_watcher) {
have_signals = 1;
} else {
uv__metrics_update_idle_time(loop);
w->cb(loop, w, pe->events);
}
nevents++;
}
}
if (reset_timeout != 0) {
timeout = user_timeout;
reset_timeout = 0;
}
if (have_signals != 0) {
uv__metrics_update_idle_time(loop);
loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN);
}