浅析tcpdump和libpcap抓包机制(上)——用户态动作

929 阅读4分钟

libpcap是网络数据包捕获函数包,而tcpdump是使用libpcap的经典软件,也是我们日常分析网络情况的常用工具。我们这次就主要来看看tcpdump如何利用libpcap是如何获取到网络包的。

我们就以tcpdump最简单的使用入手,监听本地回环的虚拟网卡,不添加条件过滤:

tcpdump -i lo

整个过程主要分两步,1、开启网卡监听,初始化监听条件;2、获取网络包,并显示出来。而这两步又会同时涉及到用户态和内核态的动作。本文仅说明用户态的动作,下篇来说明内核态的动作。

初始化和打开网卡

用户态请求监听网卡动作

1、pcap结构初始化

tcpdump通过调用libpcap的pcap_t * pcap_create(const char *device, char *errbuf)函数来打开网卡设备。在这动作中,pcap_t * pcap_create_interface(const char *device, char *ebuf)函数以及pcap_t这个结构体比较重要,pcap这个结构体包含了网卡基本的信息,以及一些重要的方法,其中最重要的还包含了该平台的一些重要socket数据,这里以pcap_linux为例。pcap_create_interface函数则根据平台对pcap结构体进行初始化。

tatic pcap_t *
open_interface(const char *device, netdissect_options *ndo, char *ebuf)
{
	pcap_t *pc;
        ......
        #ifdef HAVE_PCAP_CREATE
        //创建pcap结构体
	pc = pcap_create(device, ebuf);
	if (pc == NULL) {
		if (strstr(ebuf, "No such device") != NULL)
			return (NULL);
		error("%s", ebuf);
	}
        
        ......
        status = pcap_activate(pc);
        ......
}
......
struct pcap_linux {
	long long sysfs_dropped; /* /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors报告丢弃的数据包 */
	struct pcap_stat stat;

	char	*device;	/* 设备名称 */
	int	filter_in_userland; /* must filter in userland */
	int	blocks_to_filter_in_userland;
	int	must_do_on_close; /* stuff we must do when we close */
	int	timeout;	/* timeout for buffering */
	int	cooked;		/* using SOCK_DGRAM rather than SOCK_RAW */
	int	ifindex;	/* 我们绑定到的设备的接口索引 */
	int	lo_ifindex;	/* 环回设备的接口索引 */
	int	netdown;	/* we got an ENETDOWN and haven't resolved it */
	bpf_u_int32 oldmode;	/* mode to restore when turning monitor mode off */
	char	*mondevice;	/* mac80211 monitor device we created */
	u_char	*mmapbuf;	/* 内存映射区域指针 */
	size_t	mmapbuflen;	/* size of region */
	int	vlan_offset;	/* offset at which to insert vlan tags; if -1, don't insert */
	u_int	tp_version;	/* version of tpacket_hdr for mmaped ring */
	u_int	tp_hdrlen;	/* hdrlen of tpacket_hdr for mmaped ring */
	u_char	*oneshot_buffer; /* buffer for copy of packet */
	int	poll_timeout;	/* timeout to use in poll() */
#ifdef HAVE_TPACKET3
	unsigned char *current_packet; /* Current packet within the TPACKET_V3 block. Move to next block if NULL. */
	int packets_left; /* Unhandled packets left within the block from previous call to pcap_read_linux_mmap_v3 in case of TPACKET_V3. */
#endif
	int poll_breakloop_fd; /* fd to an eventfd to break from blocking operations */
};

2、打开网卡动作

status = pcap_activate(pc);会调用activate_op函数,这是个随平台变化的函数, 在初始化的时候被指定,handle->activate_op = pcap_activate_linux;

ret = setup_socket(handle, is_any_device);新建PF_PACKET套接字;

//从给定的设备中获取捕获网络包的句柄
static int
pcap_activate_linux(pcap_t *handle)
{
	struct pcap_linux *handlep = handle->priv;
	const char	*device;
	int		is_any_device;
	struct ifreq	ifr;
	int		status = 0;
	int		status2 = 0;
	int		ret;
        ......
        ret = setup_socket(handle, is_any_device);
        ......
        // 设置内存映射访问
	ret = setup_mmapped(handle, &status);
	
}

static int
setup_socket(pcap_t *handle, int is_any_device)
{
	struct pcap_linux *handlep = handle->priv;
	const char		*device = handle->opt.device;
	int			status = 0;
	int			sock_fd, arptype;
	int			val;
	int			err = 0;
	struct packet_mreq	mr;
        ......
        //在这里我们指定了网口,所以使用的是SOCK_RAW
        sock_fd = is_any_device ?
		socket(PF_PACKET, SOCK_DGRAM, 0) :
		socket(PF_PACKET, SOCK_RAW, 0);
        ......
        //将与FD关联的套接字绑定到给定设备
        if ((err = iface_bind(sock_fd, handlep->ifindex,
		    handle->errbuf, 0)) != 0) {
			close(sock_fd);
			return err;
		}
       ......
       
}

这里需要提一下setup_mmapped(handle, &status)这个函数调用,设置内存映射的手段。最重要的就是设置socket参数setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,(void *) &req, sizeof(req))分配物理内存。 handlep->mmapbuf = mmap(0, handlep->mmapbuflen, PROT_READ | PROT_WRITE, flags, handle->fd, 0);这样就可以将内核分配的物理内存映射到用户空间的地址,即handlep->mmapbuf

这样在内核空间中分配一块内核缓冲区,然后用户空间程序调用mmap映射到用户空间。将接收到的skb拷贝到那块内核缓冲区中,这样用户空间的程序就可以直接读到捕获的数据包了。

static int
setup_mmapped(pcap_t *handle, int *status)
{
	struct pcap_linux *handlep = handle->priv;
	int ret, flags = MAP_ANONYMOUS | MAP_PRIVATE;

......
	ret = create_ring(handle, status);
	if (ret == -1) {
		munmap(handlep->oneshot_buffer, handle->snapshot);
		handlep->oneshot_buffer = NULL;
		return -1;
	}
	set_poll_timeout(handlep);

	return 1;
}
tatic int
create_ring(pcap_t *handle, int *status)
{
	struct pcap_linux *handlep = handle->priv;
	unsigned i, j, frames_per_block;
	int flags = MAP_SHARED;
        ......
        handle->offset = 0;
        //这里将外层的pcap的buffer指向linux_pcapd的mmapbuf。
	for (i=0; i<req.tp_block_nr; ++i) {
		u_char *base = &handlep->mmapbuf[i*req.tp_block_size];
		for (j=0; j<frames_per_block; ++j, ++handle->offset) {
			RING_GET_CURRENT_FRAME(handle) = base;
			base += req.tp_frame_size;
		}
	}
        .......
}

到这里我们知道,所谓的“打开网卡”的动作,本质上是建立一个PF_PACKET的socket,并绑定到要监听的网卡设备上,同时和内核空间的网络模块建立内存映射。那接下来的包读取就不难猜测,从映射的内存中不断获取包。

网络包的读取

在main函数中有个循环在不停读取信息。读取函数p->read_op在初始化的时候为pcap_read_linux_mmap_v3

int
pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
{
	register int n;

	for (;;) {
		if (p->rfile != NULL) {
			/*
			 * 0 means EOF, so don't loop if we get 0.
			 */
			n = pcap_offline_read(p, cnt, callback, user);
		} else {
			/*
			 * XXX keep reading until we get something
			 * (or an error occurs)
			 */
			do {
				n = p->read_op(p, cnt, callback, user);
			} while (n == 0);
		}
		if (n <= 0)
			return (n);
		if (!PACKET_COUNT_IS_UNLIMITED(cnt)) {
			cnt -= n;
			if (cnt <= 0)
				return (0);
		}
	}
}

tatic int
pcap_read_linux_mmap_v3(pcap_t *handle, int max_packets, pcap_handler callback,
		u_char *user)
{
	struct pcap_linux *handlep = handle->priv;
	union thdr h;
	int pkts = 0;
	int ret;
        ......
        //读取在内存映射中的包并进行callback处理
        while (packets_to_read-- && !handle->break_loop) {
			struct tpacket3_hdr* tp3_hdr = (struct tpacket3_hdr*) handlep->current_packet;
			ret = pcap_handle_packet_mmap(
					handle,
					callback,
					user,
					handlep->current_packet,
					tp3_hdr->tp_len,
					tp3_hdr->tp_mac,
					tp3_hdr->tp_snaplen,
					tp3_hdr->tp_sec,
					handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? tp3_hdr->tp_nsec : tp3_hdr->tp_nsec / 1000,
					VLAN_VALID(tp3_hdr, &tp3_hdr->hv1),
					tp3_hdr->hv1.tp_vlan_tci,
					VLAN_TPID(tp3_hdr, &tp3_hdr->hv1));
			if (ret == 1) {
				pkts++;
			} else if (ret < 0) {
				handlep->current_packet = NULL;
				return ret;
			}
			handlep->current_packet += tp3_hdr->tp_next_offset;
			handlep->packets_left--;
		}
        ......

}

在处理中调用callback函数,该函数在main中传入print_packet,对传入的pcap_pkthdr的包信息进行解析并打印到控制台。

至此,用户态的动作解说完毕。