libpcap是网络数据包捕获函数包,而tcpdump是使用libpcap的经典软件,也是我们日常分析网络情况的常用工具。我们这次就主要来看看tcpdump如何利用libpcap是如何获取到网络包的。
我们就以tcpdump最简单的使用入手,监听本地回环的虚拟网卡,不添加条件过滤:
tcpdump -i lo
整个过程主要分两步,1、开启网卡监听,初始化监听条件;2、获取网络包,并显示出来。而这两步又会同时涉及到用户态和内核态的动作。本文仅说明用户态的动作,下篇来说明内核态的动作。
初始化和打开网卡
用户态请求监听网卡动作
1、pcap结构初始化
tcpdump通过调用libpcap的pcap_t * pcap_create(const char *device, char *errbuf)函数来打开网卡设备。在这动作中,pcap_t * pcap_create_interface(const char *device, char *ebuf)函数以及pcap_t这个结构体比较重要,pcap这个结构体包含了网卡基本的信息,以及一些重要的方法,其中最重要的还包含了该平台的一些重要socket数据,这里以pcap_linux为例。pcap_create_interface函数则根据平台对pcap结构体进行初始化。
tatic pcap_t *
open_interface(const char *device, netdissect_options *ndo, char *ebuf)
{
pcap_t *pc;
......
#ifdef HAVE_PCAP_CREATE
//创建pcap结构体
pc = pcap_create(device, ebuf);
if (pc == NULL) {
if (strstr(ebuf, "No such device") != NULL)
return (NULL);
error("%s", ebuf);
}
......
status = pcap_activate(pc);
......
}
......
struct pcap_linux {
long long sysfs_dropped; /* /sys/class/net/{if_name}/statistics/rx_{missed,fifo}_errors报告丢弃的数据包 */
struct pcap_stat stat;
char *device; /* 设备名称 */
int filter_in_userland; /* must filter in userland */
int blocks_to_filter_in_userland;
int must_do_on_close; /* stuff we must do when we close */
int timeout; /* timeout for buffering */
int cooked; /* using SOCK_DGRAM rather than SOCK_RAW */
int ifindex; /* 我们绑定到的设备的接口索引 */
int lo_ifindex; /* 环回设备的接口索引 */
int netdown; /* we got an ENETDOWN and haven't resolved it */
bpf_u_int32 oldmode; /* mode to restore when turning monitor mode off */
char *mondevice; /* mac80211 monitor device we created */
u_char *mmapbuf; /* 内存映射区域指针 */
size_t mmapbuflen; /* size of region */
int vlan_offset; /* offset at which to insert vlan tags; if -1, don't insert */
u_int tp_version; /* version of tpacket_hdr for mmaped ring */
u_int tp_hdrlen; /* hdrlen of tpacket_hdr for mmaped ring */
u_char *oneshot_buffer; /* buffer for copy of packet */
int poll_timeout; /* timeout to use in poll() */
#ifdef HAVE_TPACKET3
unsigned char *current_packet; /* Current packet within the TPACKET_V3 block. Move to next block if NULL. */
int packets_left; /* Unhandled packets left within the block from previous call to pcap_read_linux_mmap_v3 in case of TPACKET_V3. */
#endif
int poll_breakloop_fd; /* fd to an eventfd to break from blocking operations */
};
2、打开网卡动作
status = pcap_activate(pc);会调用activate_op函数,这是个随平台变化的函数,
在初始化的时候被指定,handle->activate_op = pcap_activate_linux;
ret = setup_socket(handle, is_any_device);新建PF_PACKET套接字;
//从给定的设备中获取捕获网络包的句柄
static int
pcap_activate_linux(pcap_t *handle)
{
struct pcap_linux *handlep = handle->priv;
const char *device;
int is_any_device;
struct ifreq ifr;
int status = 0;
int status2 = 0;
int ret;
......
ret = setup_socket(handle, is_any_device);
......
// 设置内存映射访问
ret = setup_mmapped(handle, &status);
}
static int
setup_socket(pcap_t *handle, int is_any_device)
{
struct pcap_linux *handlep = handle->priv;
const char *device = handle->opt.device;
int status = 0;
int sock_fd, arptype;
int val;
int err = 0;
struct packet_mreq mr;
......
//在这里我们指定了网口,所以使用的是SOCK_RAW
sock_fd = is_any_device ?
socket(PF_PACKET, SOCK_DGRAM, 0) :
socket(PF_PACKET, SOCK_RAW, 0);
......
//将与FD关联的套接字绑定到给定设备
if ((err = iface_bind(sock_fd, handlep->ifindex,
handle->errbuf, 0)) != 0) {
close(sock_fd);
return err;
}
......
}
这里需要提一下setup_mmapped(handle, &status)这个函数调用,设置内存映射的手段。最重要的就是设置socket参数setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,(void *) &req, sizeof(req))分配物理内存。
handlep->mmapbuf = mmap(0, handlep->mmapbuflen, PROT_READ | PROT_WRITE, flags, handle->fd, 0);这样就可以将内核分配的物理内存映射到用户空间的地址,即handlep->mmapbuf。
这样在内核空间中分配一块内核缓冲区,然后用户空间程序调用mmap映射到用户空间。将接收到的skb拷贝到那块内核缓冲区中,这样用户空间的程序就可以直接读到捕获的数据包了。
static int
setup_mmapped(pcap_t *handle, int *status)
{
struct pcap_linux *handlep = handle->priv;
int ret, flags = MAP_ANONYMOUS | MAP_PRIVATE;
......
ret = create_ring(handle, status);
if (ret == -1) {
munmap(handlep->oneshot_buffer, handle->snapshot);
handlep->oneshot_buffer = NULL;
return -1;
}
set_poll_timeout(handlep);
return 1;
}
tatic int
create_ring(pcap_t *handle, int *status)
{
struct pcap_linux *handlep = handle->priv;
unsigned i, j, frames_per_block;
int flags = MAP_SHARED;
......
handle->offset = 0;
//这里将外层的pcap的buffer指向linux_pcapd的mmapbuf。
for (i=0; i<req.tp_block_nr; ++i) {
u_char *base = &handlep->mmapbuf[i*req.tp_block_size];
for (j=0; j<frames_per_block; ++j, ++handle->offset) {
RING_GET_CURRENT_FRAME(handle) = base;
base += req.tp_frame_size;
}
}
.......
}
到这里我们知道,所谓的“打开网卡”的动作,本质上是建立一个PF_PACKET的socket,并绑定到要监听的网卡设备上,同时和内核空间的网络模块建立内存映射。那接下来的包读取就不难猜测,从映射的内存中不断获取包。
网络包的读取
在main函数中有个循环在不停读取信息。读取函数p->read_op在初始化的时候为pcap_read_linux_mmap_v3
int
pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
{
register int n;
for (;;) {
if (p->rfile != NULL) {
/*
* 0 means EOF, so don't loop if we get 0.
*/
n = pcap_offline_read(p, cnt, callback, user);
} else {
/*
* XXX keep reading until we get something
* (or an error occurs)
*/
do {
n = p->read_op(p, cnt, callback, user);
} while (n == 0);
}
if (n <= 0)
return (n);
if (!PACKET_COUNT_IS_UNLIMITED(cnt)) {
cnt -= n;
if (cnt <= 0)
return (0);
}
}
}
tatic int
pcap_read_linux_mmap_v3(pcap_t *handle, int max_packets, pcap_handler callback,
u_char *user)
{
struct pcap_linux *handlep = handle->priv;
union thdr h;
int pkts = 0;
int ret;
......
//读取在内存映射中的包并进行callback处理
while (packets_to_read-- && !handle->break_loop) {
struct tpacket3_hdr* tp3_hdr = (struct tpacket3_hdr*) handlep->current_packet;
ret = pcap_handle_packet_mmap(
handle,
callback,
user,
handlep->current_packet,
tp3_hdr->tp_len,
tp3_hdr->tp_mac,
tp3_hdr->tp_snaplen,
tp3_hdr->tp_sec,
handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO ? tp3_hdr->tp_nsec : tp3_hdr->tp_nsec / 1000,
VLAN_VALID(tp3_hdr, &tp3_hdr->hv1),
tp3_hdr->hv1.tp_vlan_tci,
VLAN_TPID(tp3_hdr, &tp3_hdr->hv1));
if (ret == 1) {
pkts++;
} else if (ret < 0) {
handlep->current_packet = NULL;
return ret;
}
handlep->current_packet += tp3_hdr->tp_next_offset;
handlep->packets_left--;
}
......
}
在处理中调用callback函数,该函数在main中传入print_packet,对传入的pcap_pkthdr的包信息进行解析并打印到控制台。
至此,用户态的动作解说完毕。