服务器常见设计范式(含Redis5.0网络模型)

455 阅读19分钟

Linux I/O模型

首先我们需要明确一点,对于一个套接字上的输入包括哪两部分?

  • 等待内核数据数据准备好(等待数据从网络中到达,然后复制到内核的缓冲区)
  • 把数据从内核缓冲区复制到引用进程缓冲区

任一一段阻塞,我们都将其视为同步IO。

不想看图?那好我们来看一张表吧

服务器设计范式

基本模型

无论服务端设计什么样的模型,但其基本组件是不变的,不同的在于如何进行巧妙、高效的组合。

传统服务器设计模型
迭代式

客户端需要进行排队,由此可见并不适合繁忙服务器。

每个用户一个进程

for ( ; ; ) {
		clilen = addrlen; //把客户地址结构体大小复制一份
		if ( (connfd = accept(listenfd, cliaddr, &clilen)) < 0) { //响应客户端请求,返回连接到客户端的套接字
			if (errno == EINTR) //被信号打断则继续
				continue;		/* back to for() */
			else
				err_sys("accept error"); //其他错误无法忍受
		}
		//创建一个子进程
		if ( (childpid = Fork()) == 0) {	/* child process 在子进程中 */
			Close(listenfd);	/* close listening socket 关闭监听套接字 */
			web_child(connfd);	/* process request 在这个函数中响应客户的请求*/
			exit(0); //退出子进程
		}
		Close(connfd);			/* parent closes connected socket 父进程中关闭连接到客户端的套接字,继续监听等待客户端的连接 */
	}

每个用户fork一个进程,最大的问题在于当用户量很大时,非常消耗资源。

每个用户一个进程(prefork模型)

 

	my_lock_init("/tmp/lock.XXXXXX");
	for (i = 0; i < nchildren; i++) //用循环创建子进程池
		pids[i] = child_make(i, listenfd, addrlen);	/* parent returns */
 
pid_t
child_make(int i, int listenfd, int addrlen)
{
	pid_t	pid;
	void	child_main(int, int, int);

	if ( (pid = Fork()) > 0) //创建子进程(并且在父进程中)
		return(pid);		/* parent 在父进程中,像本函数调用者返回子进程的pid */

	child_main(i, listenfd, addrlen);	/* never returns 在子进程中,调用这个函数进行处理 */
}
/* end child_make */

/* include child_main */
void
child_main(int i, int listenfd, int addrlen)
{
	int				connfd;
	void			web_child(int);
	socklen_t		clilen;
	struct sockaddr	*cliaddr;
	void            my_lock_wait();
	void            my_lock_release();

	cliaddr = Malloc(addrlen); //分配存放客户地址的地址结构体空间

	printf("child %ld starting\n", (long) getpid());
	for ( ; ; ) {
		clilen = addrlen;
		my_lock_wait();
		connfd = Accept(listenfd, cliaddr, &clilen); //产生连接到客户的套接字
		my_lock_release();

		web_child(connfd);		/* process the request 响应客户的请求*/
		Close(connfd); //关闭连接到客户的套接字
	}
}

引入池技术,有效的避免了在用户到来时进程执行fork的开销,然而需要在启动阶段预估判断多少个子进程,而且由于是多进程,耗费资源比较大,因此并发有限。

每个用户一个线程

for ( ; ; ) {
		clilen = addrlen;
		iptr = Malloc(sizeof(int)); //分配一个int大小的空间(存放文件描述符),每次循环分配一个新的空间
		*iptr = Accept(listenfd, cliaddr, &clilen); //响应客户请求,返回连接到客户端的套接字
		//创建一个线程来处理客户的请求,线程属性为默认,把连接到客户端的套接字当参数传递给线程
		Pthread_create(&tid, NULL, &doit, (void *) iptr); 
	}

相比于多进程模型,如果服务器主机提供支持线程,我们可以改用线程以取代进程。线程相比于进程的优势节省资源,一般场景够用了。但是如果一个web服务器并发量过万,可能同时会创建1w个线程,此时看看你的服务器支不支持的住哟。

每个用户一个线程+提前创建好线程池

for (i = 0; i < nthreads; i++) //创建规定数目的线程并做相应的操作
		thread_make(i);			/* only main thread returns */


void
thread_make(int i)
{
	void	*thread_main(void *);
	int     *val;
	val = Malloc(sizeof(int));
	*val = i;
	//创建线程,线程属性为null,线程id填写到线程结构体中,索引i按参数传递给线程
	Pthread_create(&tptr[i].thread_tid, NULL, &thread_main, (void *) val); 
	return;		/* main thread returns */
}

void *
thread_main(void *arg)
{
	int				connfd;
	void			web_child(int);
	socklen_t		clilen;
	struct sockaddr	*cliaddr;

	cliaddr = Malloc(addrlen); //分配客户端地址结构体空间

	printf("thread %d starting\n", *((int*)arg));
	free(arg);
	
	for ( ; ; ) {
		clilen = addrlen;
    	Pthread_mutex_lock(&mlock);
		connfd = Accept(listenfd, cliaddr, &clilen); //在互斥锁的保护下accept
		Pthread_mutex_unlock(&mlock);
		tptr[(int) arg].thread_count++; //这个线程处理的客户数目递增1

		web_child(connfd);		/* process request 在此函数中响应客户的请求 */
		Close(connfd); //关闭连接到客户的套接字
	}
}
主线程统一accept

	/* 4create all the threads */
	for (i = 0; i < nthreads; i++) //在thread_make函数中循环创建线程并做相应处理
		thread_make(i);		/* only main thread returns */

	Signal(SIGINT, sig_int); //捕捉ctrl+c信号

	for ( ; ; ) {
		clilen = addrlen;
		connfd = Accept(listenfd, cliaddr, &clilen); //响应客户请求,并返回连接到客户的套接字

		Pthread_mutex_lock(&clifd_mutex); //给全局变量(线程信息结构体的数组)加锁
		clifd[iput] = connfd; //把描述符存入
		if (++iput == MAXNCLI) //如果下标已经到达最大值,则回绕
			iput = 0;

		//如下条件成立说明主线程已经往数组中放入了过多的描述符,而仍没有线程去取出(一下子有太多的客户连接,线程忙不过来)
		if (iput == iget) //如果线程取出描述符的下标和主线程放入描述符的下标相等,则出错
			err_quit("iput = iget = %d", iput);
		Pthread_cond_signal(&clifd_cond); //给条件变量发消息
		Pthread_mutex_unlock(&clifd_mutex); //解锁互斥锁
	}

这种模式可以避免accept的线程安全问题。其实accept一个进程足够了。

事件驱动(只讨论reactor)

反应器设计模式指的是由一个或多个客户机并发地传递给应用程序的服务请求。一个应用程序中的每个服务可以由几个方法组成,并由一个单独的事件处理程序表示,该处理程序负责调度特定于服务的请求。事件处理程序的调度由管理已注册事件处理程序的启动调度程序执行。服务请求的解复用由同步事件解复用器执行。也称为调度程序、通知程序。其核心是os的IO复用(epoll_开头的相关)接口。

  1. 基本思路是:
  • 主线程往epoll内核事件表注册socket上的读事件。
  • 主线程调用epoll_wait等待socket上数据可读。
  • 当socket可读时,epoll_wait通知主线程,主线程则将socket可读事件放入请求队列。
  • 睡眠在请求队列上的某个工作线程被唤醒,他从socket读取数据,并处理用户请求,然后再往epoll内核时间表中注册socket写就绪事件。
  • 主线程epoll_wait等待socket可写。
  • 当socket可写时,epoll_wait通知主线程。主线程将socket可写事件放入请求队列。
  • 睡眠在请求队列中的某个线程被环形,他往socket上写入服务器处理客户请求的结果。
  1. 优缺点
  • 优点

    • 响应快,不必为单个同步操作所阻塞;
    • 可扩展性,可以很方便的通过增加reactor实例(如multi reactor)个数来利用CPU资源;
    • 可复用性,reactor本身与具体事件处理逻辑无关,便于复用。
  • 缺点

    • 共享同一个reactor时,若出现较长时间的读写,会影响该reactor的响应时间,此时可以考虑thread-per-connection;
单reactor单线程模型

/*
  *epoll基于非阻塞I/O事件驱动
  */
 #include <stdio.h>
 #include <sys/socket.h>
 #include <sys/epoll.h>
 #include <arpa/inet.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
 #include <string.h>
 #include <stdlib.h>
 #include <time.h>
 #define MAX_EVENTS  1024                                    //监听上限数
#define BUFLEN 4096
#define SERV_PORT   8080
 struct myevent
 {
	 int fd;//要监听文件描述符
	 int events;//对应的监听事件
	 void *arg;//泛型参数
	 void(*callback)(int fd,int events,void*arg);//回调函数
	 int status;//表示当前fd是否在红黑树上
	 
	 char buf[BUFLEN];
	 int len;
	 long last_active;//表示每次加入红黑树的时间点,防止有些链接一直连着不干活,那么可以直接将他踢掉
};
int g_efd;
struct myevent g_events[MAX_EVENTS+1];//加上1,表示加上linsten套接字


//其实就是初始化
void eventset(struct myevent*ev,int fd,void(*callback)(int ,int ,int),void*arg)
{
	ev->fd=fd;
	ev->call_back=call_back;
	ev->events=0;
	ev->arg=arg;
	ev->status=0;//表示不在树上
	memset(ev->buf,0,sizeof(ev->buf));
	ev->len=0;
	ev->last_active=time(NULL);
	return ;
}
//libevent里面不是mod,而且删除之后,在重新挂起来
void eventadd(int efd,int events,struct myevent*ev)
{
	struct epoll_event epv={0,{0}};
	int op=0;
	epv.data.ptr=ev;
	epv.events=ev->events=events;
	if(ev->status==1)
	{
		op=EPOLL_CTL_MOD;
	}
	else{
		op=EPOLL_CTL_ADD;
		ev->status=1;
	}
	if(epoll_ctl(efd,op,ev->fd,&epv)<0)
	{
	}
	else{
	}
}
void  eventdel(int efd,struct myevent*ev)
{
	struct epoll_event epv={0,{0}};
	if(ev->status!=1) return;
	epv.data.ptr=ev;
	ev->status=0;
	epoll_ctl(efd,EPOLL_CTL_DEL,ev->fd,&epv);
}
void senddata(int efd,int events,struct myevent*ev)
{
	struct myevent* ev=(struct myevent*)arg;
	int len=0;
	len=send(fd,ev->buf,sizeof(ev->buf),0);//读取数据
	eventdel(efd,ev);
	
	
	if(len>0)
	{
		 
		eventset(ev,fd,recvdata,ev);
		eventadd(efd,EPOLLIN,ev);
	}
	else{
	close(ev->fd);
	 printf("recv[fd=%d] error[%d]:%s\n", fd, errno, strerror(errno));
	}
}

void recvdata(int fd,int events,void*arg)
{
	struct myevent* ev=(struct myevent*)arg;
	int len=0;
	len=recv(fd,ev->buf,sizeof(ev->buf),0);//读取数据
	eventdel(g_efd,ev);//从红黑树中摘除
	if(len>0)
	{
		ev->len=len;
		ev->buf[len]='\0';
		eventset(ev, fd, senddata, ev);  //设置该 fd 对应的回调函数为 senddata
        eventadd(g_efd, EPOLLOUT, ev);
	}
	else if(len==0){
		close(ev->fd);
		/* ev-g_events 地址相减得到偏移元素位置 */
		printf("[fd=%d] pos[%ld], closed\n", fd, ev-g_events);
		
	}
	else{
	close(ev->fd);
	 printf("recv[fd=%d] error[%d]:%s\n", fd, errno, strerror(errno));
	}
}
void acceptconn(int fd,int events,void * arg)
{
	struct sockaddr_in cin;
	socklen_t len=sizeof(cin);
	int cfd,i;
	if((cfd=accept(lfd,(struct sockaddr*)&cin,&len)==-1)
	{
		if(errno!=EAGAINST||errno!=EINTR)
		{
			
		}
		return ;
	}
	do{
		for(int i=0;i<MAX_EVENTS;i++)
		{
			if(g_events[i].status==0)//表示这个位置可用
			{
				break;
			}
		}
			if(i==MAX_EVENTS){break;}
			
			int flag=0;
			if((flag=fcntl(cfd,F_SETFL,O_NONBLOCK))<0)
			{
				break;
			}
		eventset(&g_events[i],cfd,recvdata,&g_events[i]);
		eventadd(efd,EPOLLIN,&g_events[i]);
		
	}while(0);//这个有点像goto语句
	
}

 void initlistensocket(int efd, short port)
 {
    struct sockaddr_in sin;
 
     int lfd = socket(AF_INET, SOCK_STREAM, 0);
     fcntl(lfd, F_SETFL, O_NONBLOCK);                                            //将socket设为非阻塞
     memset(&sin, 0, sizeof(sin));                                               //bzero(&sin, sizeof(sin))
     sin.sin_family = AF_INET;
     sin.sin_addr.s_addr = INADDR_ANY;
     sin.sin_port = htons(port);
 
     bind(lfd, (struct sockaddr *)&sin, sizeof(sin));
 
     listen(lfd, 20);
	 //将最后一个参数留给lfd,注意回调函数的参数就是函数参数本身
    eventset(&g_events[MAX_EVENTS], lfd, acceptconn, &g_events[MAX_EVENTS]);
	eventadd(efd, EPOLLIN, &g_events[MAX_EVENTS]);
 }

int main(int argc,char*argv[])
{
	unsigned short port=SERV_PORT;
	if(argc==2)
	{
		port=atoi(argv[1]);
	}
	g_efd=epoll_create(MAX_EVENTS+1);//创建一颗红黑树,返回值为该红黑树的根节点。
	
	initlinstensocket(g_efd,port);//初始化套接字
	
	struct epoll_event events[MAX_EVENTS+1);
	
	
	int checkpos=0,i;
	while(1)
	{
		//先来一波超时验证,每次测试100个连接,不测试listenfd,当客户端60秒内没有和服务器通信时候,则关闭此客户端
		long now=time(NULL);
		//每次100个这样进行处理
		for(i=0;i<100;i++,checkpos++)
		{
			if(checkpos==MAX_EVENTS)
			{
				checkpos=0;
			}
			if(g_events[checkpos].status!=1)//表示不在红黑树上
			{
				continue;
			}
			long duration=now-g_events[checkpos].last_active;//客户端不活跃的时间
			if(duration>=60)
			{
				close(g_events[checkpos0.fd);
				eventdel(g_efd,&g_events[checkpos]);//将该客户端从g_efd中删除掉
			}
		}
		//开始监听红黑树,将满足的时间的文件描述符加之events数组中,1s没事件就返回0
		int nfd=epoll_wait(g_efd,events,MAX_EVENTS+1,1000);
		if(nfd<0){break;}
		
		for(i=0;i<nfd;i++)
		{
			struct myevent*ev=(struct myevent*)events[i].data.ptr;
			if(events[i].events&EPOLLIN&&ev->events&EPOLLIN)
			{
				ev->callback(ev->fd,events[i].events,ev->arg);//可读事件
			}
			if(events[i].events&EPOLLOUT&&ev->events&EPOLLOUT)
			{
				ev->callback(ev->fd,events[i].events,ev->arg);//可写事件
			}
		}
	}
	return 0;
}

得益于epoll的高性能,一般场景够用了。我们的Redis就是使用的是单线程版的reactor。

单reactor+工作线程池

单线程的reactor,业务处理也在IO线程中,此时如果有耗时操作,会影响并发。因此我们使用工作线程池来异步耗时的操作。

multi reactor-1

在上文分析中,我们发现单线程的reactor太忙了,既当爹(接受新的用户,即响应listenfd套接字)又当妈的(响应客户端发来了数据以及给客户端回消息,即已连接的套接字),那我们干脆直接把他在拆开不就行了吗?这样的话,是不是能响应更多的并发了?

简化版就是

//主线程
server_manager* server_manager_create(int port, int thread_num)
{
    pthread_spin_init(&lock, PTHREAD_PROCESS_PRIVATE);
    server_manager* manager = (server_manager*)malloc(sizeof(server_manager));
    if (manager == NULL)  {
		debug_ret("create server_manager failed, file: %s, line: %d", __FILE__, __LINE__);
		return NULL;
	}
	manager->listen_port = port;
    //这里创建的是主reactor
    manager->loop = event_loop_create();
    if (manager->loop == NULL)  {
        debug_ret("create epoller failed, file: %s, line: %d", __FILE__, __LINE__);
	 	mu_free(manager);
	 	return NULL;
    }

    signal(SIGPIPE, SIG_IGN);

    if (thread_num < 0) {
        thread_num = MAX_LOOP;
    }
    manager->loop_num = thread_num;
    pthread_t tid;
    long long i = 0;
    //这里创建的是子reactor
	for (i = 0; i < thread_num; i++)  {
		pthread_create(&tid, NULL, spawn_thread, (void *)i);
	}

    while(true)  {       //等等event_loop全部create完毕
        pthread_spin_lock(&lock);
        if (started_loop == thread_num)  {
            pthread_spin_unlock(&lock);
            break;
        }
        pthread_spin_unlock(&lock);
    }

    pthread_spin_destroy(&lock);
    manager->timer_manager = timer_manager_create();
	
	return manager;
}

spawn_thread函数


void* spawn_thread(void *arg)
{
	int i = (long)arg;
    //这里又是创建reactor的过程
	g_loops[i] = event_loop_create();
    pthread_spin_lock(&lock);
    started_loop++;
    pthread_spin_unlock(&lock);
	event_loop_run(g_loops[i]);
}

event_loop_create函数


event_loop* event_loop_create()
{
    event_loop* loop = (event_loop*)malloc(sizeof(event_loop));
    if (loop == NULL)  {
        debug_ret("create event loop failed, file : %s, line : %d", __FILE__, __LINE__);
        return NULL;
    }
    //看这个
    loop->epoll_fd = epoller_create();
    if (loop->epoll_fd == -1)  {
        debug_ret("epooler_create failed, file : %s, line : %d", __FILE__, __LINE__);
        free(loop);
        return NULL;
    }

    return loop;
}

主reactor阻塞在监听套接字上。


listener* listener_create(server_manager* manager, inet_address ls_addr,
                         message_callback_pt msg_cb, connection_callback_pt new_con_cb)
{
    listener* ls = (listener*)malloc(sizeof(listener));
    if (ls == NULL)  {
        debug_ret("create listener failed, file: %s, line: %d", __FILE__, __LINE__);
        return NULL;
    }

    ls->listen_addr = ls_addr;

    manager->msg_callback = msg_cb;
    manager->new_connection_callback = new_con_cb;

    int bOk = -1;
    event* lev = NULL;
    int listen_fd;
    do {
        listen_fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);     //创建非阻塞套接字 
        if (listen_fd < 0)  {
            bOk = ERR_SOCKET;
            break;
        }

        int opt = 1;
        setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
        int ret = bind(listen_fd, (struct sockaddr *)&ls_addr.addr, sizeof(ls_addr.addr));
        if (ret < 0)  {
            bOk = ERR_BIND;
            break;
        }

        ret = listen(listen_fd, SOMAXCONN);
        if (ret < 0)  {
            bOk = ERR_LISTEN;
            break;
        }
        //看看这个核心代码
        lev = event_create(listen_fd, EPOLLIN | EPOLLPRI,
                                    event_accept_callback, manager, NULL, NULL);       //后面参数是读写回调及其参数
        if (lev == NULL)  {
            bOk = ERR_EVENT;
            break;
        }

        bOk = 0;
    } while(0);

    if (bOk != 0)  {
        debug_ret("create listener failed, error code is %d, file: %s, line: %d", bOk, __FILE__, __LINE__);
        if (listen_fd > 0)  {
            close(listen_fd);
        }
        free(ls);
        return NULL;
    }  
    else  {
        event_add_io(manager->loop->epoll_fd, lev);
    }
	
}

主reactor阻塞在监听套接字上,当有新用户来的时候,调用accept api创建已连接的套接字,按照某种负载均衡算法给子reactor。


static void event_accept_callback(int listenfd, event* ev, void* arg)
{
    server_manager *manager = (server_manager *)arg;
	inet_address client_addr;
	socklen_t clilen = sizeof(client_addr.addr);
	//这里是重点1
	int connfd = accept(listenfd, (struct sockaddr *)&client_addr.addr,	&clilen);
	if (connfd < 0)  {
		int save = errno;
		if (save == EAGAIN || save == ECONNABORTED || save == EINTR
			|| save == EPROTO || save == EPERM || save == EMFILE)
		{
			return;
		}
		else  {
			debug_sys("accept failed, file: %s, line: %d", __FILE__, __LINE__);
		}
	}

	
	//char buff[50];
	//printf("connection from %s, port %d\n",
	//		inet_ntop(AF_INET, &client_addr.addr.sin_addr, buff, sizeof(buff)),
	//		ntohs(client_addr.addr.sin_port));

	fcntl(connfd, F_SETFL, fcntl(connfd, F_GETFL) | O_NONBLOCK);

    static int i = 0;
	if (i >= manager->loop_num)
		i = 0;
//如果没有开启线程则用主线程的,否则选择值reactor。这里采用的是轮训
    event_loop* loop = NULL;
    if (manager->loop_num == 0)  {     
        loop = manager->loop;
    }
    else  {
        loop = g_loops[i++];
    }
	
	connection *conn = connection_create(loop, connfd, manager->msg_callback);      //后面的参数是指有消息时的用户回调
	if (conn == NULL)  {
		debug_quit("create connection failed, file: %s, line: %d", __FILE__, __LINE__);
	}
    conn->disconnected_cb = default_disconnected_callback;
	if (manager->new_connection_callback) {
        conn->connected_cb = manager->new_connection_callback;
        connection_established(conn);
    }

    connection_start(conn, loop);

}

接着上面的,我们看看connection_create函数


connection* connection_create(event_loop* loop, int connfd, message_callback_pt msg_cb)
{
    connection* conn = (connection* )mu_malloc(sizeof(connection));
    if (conn == NULL)  {
        debug_ret("create connection failed, file: %s, line: %d", __FILE__, __LINE__);
        return NULL;
    }

    memset(conn, 0, sizeof(connection));
    conn->connfd = connfd;
    conn->message_callback = msg_cb;
    //看看这里,这里是已连接的套接字哟
    event* ev = (event*)event_create(connfd,  EPOLLIN | EPOLLPRI, event_readable_callback, 
                            conn, event_writable_callback, conn);
    if (ev == NULL)  {
        debug_ret("create event failed, file: %s, line: %d", __FILE__, __LINE__);
        mu_free(conn);
        return NULL;
    }

    conn->conn_event = ev;
    
    return conn;    
}

需要说明一点的是,这个模型的业务处理线程池,视场景而定,不一定需要。

multi reactor-2

通过分析multi reactor-1版本,我们发现那就是把处理监听的套接字事件拿出来了,那我们能不能不拿出来呢?直接丢到线程池中。


int main(){
	socklen_t			clilen, addrlen;
	struct sockaddr		cliaddr;
	TcpServer tcpServer;
	tcpServer.initServer(NULL, "9999", &addrlen);
	bool stopServer=false;

	threadpool thrpool(10);
	for(int i=0;i<5;i++)
	{

	//创建5个线程
	thrpool.commit([&](){
		while (!stopServer)
		{
            //这里就是上图中的reactor
				unique_ptr<DataNode> datas=tcpServer.readData();
		cout<<"recvfrom"<<datas->fd<<" data:["<<datas->buffer->data()<<"]"<<endl;
		unique_ptr<DataNode> recvdata(new DataNode(datas->fd,unique_ptr<vector<char>>(new vector<char>{'p','o','n','g'})));
		tcpServer.writeData(std::move(recvdata));
		}
		
				
	});
	}
}


#include "Server.h"
#include "Logger.h"
#include "SocketsOp.h"
#include "Config.h"

TcpServer::TcpServer() 
{
	
}


TcpServer::~TcpServer()
{
}



int TcpServer::initServer(const char *host, const char *serv, socklen_t *addrlenp){
	int				listenfd, n;
	const int		on = 1;
	struct addrinfo	hints, *res, *ressave;

	bzero(&hints, sizeof(struct addrinfo)); // 
	hints.ai_flags = AI_PASSIVE; // : 
	hints.ai_family = AF_UNSPEC; // 
	hints.ai_socktype = SOCK_STREAM; //tcp

	if ((n = getaddrinfo(host, serv, &hints, &res)) != 0) // 
		debug_quit("tcp_listen error for %s, %s: %s",
		host, serv, gai_strerror(n));
	ressave = res; // 

	// 
	do {
		// 
		listenfd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
		if (listenfd < 0)
			continue;		/* error, try next one   */

		setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); // 
		if (bind(listenfd, res->ai_addr, res->ai_addrlen)==0) //�󶨵�ַ
			break;			/* success   */

		close(listenfd);	/* bind error, close and try next one   */
	} while ((res = res->ai_next) != NULL);

	if (res == NULL)	/* errno from final socket() or bind()  */
		debug_sys("tcp_listen error for %s, %s", host, serv);

	listen(listenfd, LISTENQ);  
	if (addrlenp)  
		*addrlenp = res->ai_addrlen;	/* return size of protocol address */

	freeaddrinfo(ressave);  
	this->epollFd = epoll_create(1024);

	if (this->epollFd < 0){
		return -1;
	}
	int reuse = 1;
	Setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse));
	this->listenFd = listenfd;

	g_FdLists2.insert(std::pair<int, shared_ptr<Item>>(listenfd, make_shared<Item>()));
	g_FdLists2[listenfd]->epollFd=epollFd;
	this->events.resize(1024);
	addEvent(this->epollFd, this->listenFd, EPOLLIN);
	return 0;  
}

int TcpServer::writeData(unique_ptr<DataNode> data){
	
	if (g_FdLists2.count(data->fd) < 0){
		debug_ret("��ǰ�׽��ֲ�����");
		return -1;
	}
 
    
	shared_ptr<Item> items=g_FdLists2[data->fd];
	 
	items->quickoutputbuffer.emplace_back(std::move(data->buffer));
	 
	 
 
	modifyEvent(this->epollFd,data->fd,EPOLLOUT);
		 
	// cout<<"write data epoll fd= "<<this->epollFd<<";fd="<<data->fd<<endl;
}



unique_ptr<DataNode> TcpServer::readData(){
	while (true)
	{

		 
		// 
		int numbers = epoll_wait(this->epollFd, &this->events[0], this->events.size(), -1);
	     
		for (int i = 0; i < numbers; i++){
			int sockfd = events[i].data.fd;
			
			if (sockfd == this->listenFd){
				struct sockaddr_in clientAddress;
				socklen_t clientLen = sizeof(clientAddress);
				int connFd = Accept(this->listenFd, (struct sockaddr*)&clientAddress, &clientLen);
				if (connFd < 0){
					continue;
				}
				cout<<"新连接 epoll fd= "<<this->epollFd<<";fd="<<connFd<<endl;
				 
				SetNonBlock(connFd);
				addEvent(epollFd, connFd, EPOLLIN|EPOLLRDHUP | EPOLLHUP | EPOLLERR);
				// 
				g_FdLists2.insert(std::pair<int, shared_ptr<Item>>(connFd, make_shared<Item>()));
				 g_FdLists2[connFd]->epollFd=epollFd;

			}
			else if (events[i].events&(EPOLLRDHUP | EPOLLHUP | EPOLLERR)){
				if (g_FdLists2.count(sockfd) > 0){
					g_FdLists2[sockfd]->quickoutputbuffer.clear();
					
					removeEvent(g_FdLists2[sockfd]->epollFd,sockfd,EPOLLIN);
					close(sockfd);
					g_FdLists2.erase(sockfd);
					debug_ret("EPOLLRDHUP | EPOLLHUP | EPOLLERR,close fd");

				}
			}
			// 
			else if (events[i].events&EPOLLIN){
				 
				if (g_FdLists2.count(sockfd) <= 0){
					debug_sys("failed");
					continue;
				}
				unique_ptr<vector<char>> inputbuffer(new vector<char>(65535,0));
				struct iovec iovc[1];
				iovc[0].iov_base = &*inputbuffer->begin();
				iovc->iov_len = 65535;
				int ret = readv(sockfd, iovc, 1);
				if (ret < 0){
					if (errno != EWOULDBLOCK || errno != EAGAIN)
					{
						removeEvent(g_FdLists2[sockfd]->epollFd,sockfd,EPOLLIN);
						close(sockfd);
						g_FdLists2.erase(sockfd);
						
						debug_sys("readv failed,%d", errno);
					}
					continue;
				}
				else if (ret == 0){
					
					removeEvent(g_FdLists2[sockfd]->epollFd,sockfd,EPOLLIN);
					close(sockfd);
					g_FdLists2.erase(sockfd);
					debug_sys("对端关闭了套接字,%d", errno);
				}
				else{
					if (ret != 65535){
						inputbuffer->resize(ret);
					}

				}
				 

				unique_ptr<DataNode> datas(new DataNode(sockfd,std::move(inputbuffer)));
				 
				return std::move(datas);

			}
			// 
			else if (events[i].events&EPOLLOUT){
				if (g_FdLists2.count(sockfd) <= 0){
					continue;
				}
				 
				if (!g_FdLists2[sockfd]->quickoutputbuffer.empty())
				{
					
					 
					int total = 0;
					int size = g_FdLists2[sockfd]->quickoutputbuffer.size();
					 
					// 

					size = min(size, IOV_MAX);
					std::vector< struct iovec> iov(size);
					
					for (int i = 0; i < size; i++)
					{
						shared_ptr<std::vector<char>> item=g_FdLists2[sockfd]->quickoutputbuffer[i];
						iov[i].iov_base = &*item->begin();
						iov[i].iov_len = g_FdLists2[sockfd]->quickoutputbuffer[i]->size();
						total += iov[i].iov_len;
						 
					}
					int ret = writev(sockfd, &iov[0], size);
					cout<<"write data="<<ret<<endl;
					if (ret < 0){
						if (errno != EWOULDBLOCK || errno != EAGAIN){
							
							removeEvent(g_FdLists2[sockfd]->epollFd,sockfd,EPOLLOUT);
							Shutdown(sockfd,SHUT_WR);
							g_FdLists2.erase(sockfd);
							debug_sys("send data error");
						}
						else{
							
							 
						}

					}
					else if (ret == total){
						g_FdLists2[sockfd]->quickoutputbuffer.erase(g_FdLists2[sockfd]->quickoutputbuffer.begin(), g_FdLists2[sockfd]->quickoutputbuffer.begin() + size);
						// 
						g_FdLists2[sockfd]->quickoutputbuffer.clear();
						int mask=(events[i].events& (~EPOLLOUT));
						
						mask|=EPOLLIN;

						modifyEvent(g_FdLists2[sockfd]->epollFd,sockfd,mask);

						cout<<"数据写完了"<<endl;
					}
					else{
						// 
						total=0;
						for (int i = 0; i < size; i++)
						{
							  
							iov[i].iov_len = g_FdLists2[sockfd]->quickoutputbuffer[i]->size();
							total += iov[i].iov_len;
						}
						deque<shared_ptr<std::vector<char>>>::iterator iter = g_FdLists2[sockfd]->quickoutputbuffer.begin();
						for (; iter != g_FdLists2[sockfd]->quickoutputbuffer.end() && ret > 0;)
						{
							int curSize = iter->get()->size();;
							 
							// 
							if (ret >= curSize){
								iter = g_FdLists2[sockfd]->quickoutputbuffer.erase(iter);
							}
							else if (ret < curSize){
								g_FdLists2[sockfd]->quickoutputbuffer.erase(g_FdLists2[sockfd]->quickoutputbuffer.begin(), g_FdLists2[sockfd]->quickoutputbuffer.begin() + ret);
							}
							ret -= curSize;
						}
						debug_ret("缓存区大小=%d",g_FdLists2[sockfd]->quickoutputbuffer.size());
					   
					}
				}
			}
		}
	}
	
}

这种模式可行性是很高的,而且个人也比较推荐,因为方便?当然你可能会怀疑里面accept和epoll相关函数的线程安全性问题。但是很高兴的告诉你,他们是线程安全的。

struct eventpoll {
	...
	/* 一个自旋锁 */
	spinlock_t lock;
		
	/* 一个互斥锁 */
	struct mutex mtx;

	/* List of ready file descriptors */
	/* 就绪fd队列 */
	struct list_head rdllist;

	/* RB tree root used to store monitored fd structs */
	/* 红黑树 */
	struct rb_root_cached rbr;
	...
};

当然需要注意一点的是epoll_wait可能会引发惊群效应。

multi reactor-3

其实这个版本是没有啥意思的,因为他出现前提是main reactor(即负责处理监听套接字的线程响应不过来),天了噜,这个得多大的并发?我估计也就tomcat那种可能会用到,因此这个不具体给出。

redis的网络模型

redis采用的是单线程reactor。单机压测QPS可以达到10w,因此不要小看单线程的reactor,在选型时要慎重,不是说越复杂的东西就越好,适合才是最好的。

对一个网络库而言,主要关心的是三大类事件:文件事件、定时事件以及信号。在redis中文件事件和定时事件被添加至I/O复用中进行统一管理,而信号则通过信号处理函数来异步处理。

  • 定时事件:实际上redis支持的是周期任务事件,即执行完之后不会删除,而是在重新插入链表。 定时器采用链表的方式进行管理,新定时任务插入链表表头。
 if (aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
        serverPanic("Can't create event loop timers.");
        exit(1);
    }


具体定时事件处理如下


/* Process time events */
static int processTimeEvents(aeEventLoop *eventLoop) {
    int processed = 0;
    aeTimeEvent *te;
    long long maxId;
    time_t now = time(NULL);

    /* If the system clock is moved to the future, and then set back to the
     * right value, time events may be delayed in a random way. Often this
     * means that scheduled operations will not be performed soon enough.
     *
     * Here we try to detect system clock skews, and force all the time
     * events to be processed ASAP when this happens: the idea is that
     * processing events earlier is less dangerous than delaying them
     * indefinitely, and practice suggests it is. */
    //如果系统时间被修改至未来,然后又返回正确的值,此时定时事件可能会被随机推迟。
    //在这里的策略就是提前执行定时器要比延迟执行更安全
    if (now < eventLoop->lastTime) {
        te = eventLoop->timeEventHead;
        while(te) {
            te->when_sec = 0;
            te = te->next;
        }
    }
    //更新为当前的时间
    eventLoop->lastTime = now;

    te = eventLoop->timeEventHead;
    maxId = eventLoop->timeEventNextId-1;
    //在这里就是删除定时器
    while(te) {
        long now_sec, now_ms;
        long long id;
        //在下一轮中对事件进行删除
        /* Remove events scheduled for deletion. */
        if (te->id == AE_DELETED_EVENT_ID) {
            aeTimeEvent *next = te->next;
            if (te->prev)
                te->prev->next = te->next;
            else
                eventLoop->timeEventHead = te->next;
            if (te->next)
                te->next->prev = te->prev;
            if (te->finalizerProc)
                te->finalizerProc(eventLoop, te->clientData);
            zfree(te);
            te = next;
            continue;
        }

        /* Make sure we don't process time events created by time events in
         * this iteration. Note that this check is currently useless: we always
         * add new timers on the head, however if we change the implementation
         * detail, this check may be useful again: we keep it here for future
         * defense. */
        if (te->id > maxId) {
            te = te->next;
            continue;
        }
        aeGetTime(&now_sec, &now_ms);
        if (now_sec > te->when_sec ||
            (now_sec == te->when_sec && now_ms >= te->when_ms))
        {
            int retval;

            id = te->id;
           // timeProc 函数的返回值 retval 为时间事件执行的时间间隔
            retval = te->timeProc(eventLoop, id, te->clientData);
            processed++;
            if (retval != AE_NOMORE) {
                aeAddMillisecondsToNow(retval,&te->when_sec,&te->when_ms);
            } else {
                //如果超时了,那么则标记位删除
                te->id = AE_DELETED_EVENT_ID;
            }
        }
        //进行下一个
        te = te->next;
    }
    return processed;
}

  • 信号:在initserver中注册信号处理函数sigShutdownHandler,在信号处理函数中主要是将shutdown_asap置为1,当然如果之前已经为1,那么直接exit。否则将会在serverCron函数的prepareForShutdown中执行收尾工作.

void setupSignalHandlers(void) {
    struct sigaction act;

    /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used.
     * Otherwise, sa_handler is used. */
    sigemptyset(&act.sa_mask);
    act.sa_flags = 0;
    act.sa_handler = sigShutdownHandler;
    sigaction(SIGTERM, &act, NULL);
    sigaction(SIGINT, &act, NULL);
    return;
}

sigShutdownHandler具体函数


static void sigShutdownHandler(int sig) {
    char *msg;

    switch (sig) {
    case SIGINT:
        msg = "Received SIGINT scheduling shutdown...";
        break;
    case SIGTERM:
        msg = "Received SIGTERM scheduling shutdown...";
        break;
    default:
        msg = "Received shutdown signal, scheduling shutdown...";
    };

    /* SIGINT is often delivered via Ctrl+C in an interactive session.
     * If we receive the signal the second time, we interpret this as
     * the user really wanting to quit ASAP without waiting to persist
     * on disk. */
    if (server.shutdown_asap && sig == SIGINT) {
        serverLogFromHandler(LL_WARNING, "You insist... exiting now.");
        rdbRemoveTempFile(getpid());
        exit(1); /* Exit with an error since this was not a clean shutdown. */
    } else if (server.loading) {
        exit(0);
    }

    serverLogFromHandler(LL_WARNING, msg);
    server.shutdown_asap = 1;
}

参考

《Unix网络编程:卷1》

《Linux高性能服务器编程》

《Scalable IO in Java》

个人公众号

欢迎关注和投稿公众号coderFan.