SCM_RIGHTS 连接迁移
SCM_RIGHTS 是一种通过Unix Socket在本地传输文件描述符的功能,可以把一个连接从进程A传递到进程B。可以实现一些灰度功能发布的场景和进程重启时候实现长连接不会中断。比如Cloudflare就利用SCM_RIGHTS在早期支持TLS1.3的时候,实现灰度的能力,处理TLS1.3的进程可以经常变更,不会影响非TLS1.3的请求。MOSN也利用了SCM_RIGHTS实现了连接的迁移,见blog.cloudflare.com/know-your-s… 。 需要注意的是SCM_RIGHTS需要利用sendmsg/recvmsg系统调用,在Unix Socket上传递文件描述符,不能通过TCP。所以不能实现跨Host的连接迁移,见mosn.io/docs/produc… 。
Demo
本文提供一个简单的例子:存在三个对象:客户端、进程A和进程B。 具体的流程是:
- 客户端TCP连接上进程A
- 客户端发送一个message给进程A,进程A读取后,加上前缀后返回给客户端
- 进程A把已经建立的TCP连接通过sendmsg发送给进程B
- 进程B使用recvmesg读取到TCP连接的fd后,继续处理客户端发送的请求
效果
进程A源码
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/un.h>
#define MY_SOCK_PATH "/tmp/mysocket"
#define LISTEN_BACKLOG 50
void send_fd(int socket, int fd_to_send) {
struct msghdr message;
struct iovec iov;
struct cmsghdr *control_message = NULL;
char ctrl_buf[CMSG_SPACE(sizeof(int))];
char data[1];
memset(&message, 0, sizeof(struct msghdr));
memset(ctrl_buf, 0, CMSG_SPACE(sizeof(int)));
/* We are passing at least one byte of data so that recvmsg() will not return 0 */
data[0] = ' ';
iov.iov_base = data;
iov.iov_len = sizeof(data);
message.msg_name = NULL;
message.msg_namelen = 0;
message.msg_iov = &iov;
message.msg_iovlen = 1;
message.msg_controllen = CMSG_SPACE(sizeof(int));
message.msg_control = ctrl_buf;
control_message = CMSG_FIRSTHDR(&message);
control_message->cmsg_level = SOL_SOCKET;
control_message->cmsg_type = SCM_RIGHTS;
control_message->cmsg_len = CMSG_LEN(sizeof(int));
*((int *) CMSG_DATA(control_message)) = fd_to_send;
if (sendmsg(socket, &message, 0) < 0) {
perror("Failed to send message");
exit(1);
}
}
int main(void) {
int sfd, cfd, ufd;
struct sockaddr_in my_addr, peer_addr;
socklen_t peer_addr_size;
struct sockaddr_un un_addr;
sfd = socket(AF_INET, SOCK_STREAM, 0);
if (sfd == -1) {
perror("socket");
exit(EXIT_FAILURE);
}
memset(&my_addr, 0, sizeof(struct sockaddr_in));
my_addr.sin_family = AF_INET;
my_addr.sin_addr.s_addr = INADDR_ANY;
my_addr.sin_port = htons(12345);
if (bind(sfd, (struct sockaddr *) &my_addr, sizeof(struct sockaddr_in)) == -1) {
perror("bind");
exit(EXIT_FAILURE);
}
if (listen(sfd, LISTEN_BACKLOG) == -1) {
perror("listen");
exit(EXIT_FAILURE);
}
/* Accept actual connection from the client */
peer_addr_size = sizeof(struct sockaddr_in);
cfd = accept(sfd, (struct sockaddr *) &peer_addr, &peer_addr_size);
if (cfd == -1) {
perror("accept");
exit(EXIT_FAILURE);
}
/* Here we would normally do something with the connected socket */
/* Handle the first message from the client */
char buffer[1024];
ssize_t numRead = read(cfd, buffer, sizeof(buffer));
if (numRead > 0) {
printf("Process A received message: %.*s\n", (int)numRead, buffer);
// Send a response with a prefix to the client
char response[1024];
int len = snprintf(response, sizeof(response), "Process A: %.*s", (int)numRead, buffer);
if (write(cfd, response, len) != len) {
perror("partial/failed write");
close(cfd);
close(sfd);
exit(EXIT_FAILURE);
}
} else if (numRead == -1) {
perror("read");
close(cfd);
close(sfd);
exit(EXIT_FAILURE);
}
/* Now we create a UNIX domain socket to send the descriptor to process B */
ufd = socket(AF_UNIX, SOCK_DGRAM, 0);
if (ufd == -1) {
perror("socket");
exit(EXIT_FAILURE);
}
memset(&un_addr, 0, sizeof(struct sockaddr_un));
un_addr.sun_family = AF_UNIX;
strncpy(un_addr.sun_path, MY_SOCK_PATH, sizeof(un_addr.sun_path) - 1);
if (connect(ufd, (struct sockaddr *) &un_addr, sizeof(struct sockaddr_un)) == -1) {
perror("connect");
exit(EXIT_FAILURE);
}
/* Send the descriptor */
send_fd(ufd, cfd);
/* Close the connected socket and listening socket */
close(cfd);
close(sfd);
return 0;
}
进程B源码
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <errno.h>
#define MY_SOCK_PATH "/tmp/mysocket"
#define BUFFER_SIZE 1024
int recv_fd(int socket) {
int received_fd;
struct msghdr message;
struct iovec iov;
struct cmsghdr *control_message = NULL;
char ctrl_buf[CMSG_SPACE(sizeof(int))];
char data[1];
int res;
memset(&message, 0, sizeof(struct msghdr));
memset(ctrl_buf, 0, CMSG_SPACE(sizeof(int)));
iov.iov_base = data;
iov.iov_len = sizeof(data);
message.msg_name = NULL;
message.msg_namelen = 0;
message.msg_iov = &iov;
message.msg_iovlen = 1;
message.msg_control = ctrl_buf;
message.msg_controllen = CMSG_SPACE(sizeof(int));
if ((res = recvmsg(socket, &message, 0)) <= 0) {
return -1;
}
control_message = CMSG_FIRSTHDR(&message);
if ((control_message->cmsg_level == SOL_SOCKET) &&
(control_message->cmsg_type == SCM_RIGHTS)) {
received_fd = *((int *) CMSG_DATA(control_message));
} else {
received_fd = -1;
}
return received_fd;
}
int main(void) {
int ufd, fd;
struct sockaddr_un un_addr;
char buffer[BUFFER_SIZE];
ssize_t numRead;
ufd = socket(AF_UNIX, SOCK_DGRAM, 0);
if (ufd == -1) {
perror("socket");
exit(EXIT_FAILURE);
}
memset(&un_addr, 0, sizeof(struct sockaddr_un));
un_addr.sun_family = AF_UNIX;
strncpy(un_addr.sun_path, MY_SOCK_PATH, sizeof(un_addr.sun_path) - 1);
if (bind(ufd, (struct sockaddr *) &un_addr, sizeof(struct sockaddr_un)) == -1) {
perror("bind");
exit(EXIT_FAILURE);
}
/* Receive the file descriptor from process A */
fd = recv_fd(ufd);
if (fd == -1) {
perror("recv_fd");
exit(EXIT_FAILURE);
}
/* Now we can use the received file descriptor to read data sent by the client */
while ((numRead = read(fd, buffer, BUFFER_SIZE)) > 0) {
printf("Process B received message: %.*s\n", (int)numRead, buffer);
// Send a response with a prefix to the client
char response[BUFFER_SIZE];
int len = snprintf(response, sizeof(response), "Process B: %.*s", (int)numRead, buffer);
if (write(fd, response, len) != len) {
perror("partial/failed write");
exit(EXIT_FAILURE);
}
}
if (numRead == -1) {
perror("read");
exit(EXIT_FAILURE);
}
close(fd);
close(ufd);
unlink(MY_SOCK_PATH);
return 0;
}