浅析iptables NFQUEUE机制——原理篇

1,019 阅读3分钟

上篇文章讲了NFQUEUE的使用以及在用户态程序上面的情况,本篇主要涉及内核态的nf_queue的在内核中的动作。

内核eBPF追踪nf_queue

sudo bpftrace -e 'k:nf_queue {@[kstack] = count(); }'

查看nf_queue的内核函数栈

内核中功能逻辑分为两部分:

  • 接收指定iptables符合规则的包,并通过 nfnetlink 将网络包发给用户程序
@[  
   nf_queue+1  
   nf_hook_slow+122  
   ip_local_deliver+195  
   ip_rcv+389  
   __netif_receive_skb_core.constprop.0+1547  
   __netif_receive_skb_list_core+314  
   netif_receive_skb_list_internal+490  
   napi_complete_done+109  
   iwl_pcie_napi_poll_msix+162  
   __napi_poll+75  
   net_rx_action+641  
   __softirqentry_text_start+205  
   do_softirq+196  
   __local_bh_enable_ip+108  
   iwl_pcie_irq_rx_msix_handler+189  
   irq_thread_fn+28  
   irq_thread+233  
   kthread+288  
   ret_from_fork+31  
]
  • 接收用户程序通过 nfnetlink 发送过来的判决结果,并进行处理
@[  
   nfqnl_recv_verdict+1  
   nfnetlink_rcv_msg+484  
   netlink_rcv_skb+78  
   netlink_unicast+580  
   netlink_sendmsg+594  
   sock_sendmsg+98  
   __sys_sendto+275  
   __x64_sys_sendto+32  
   do_syscall_64+88  
   entry_SYSCALL_64_after_hwframe+97  
]

nf_queue发送逻辑

在nf_hook_slow switch里面的逻辑对应的-j的逻辑,-j NFQUEUE即对应NF_QUEUE的逻辑,接下来调用nf_queue()

int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
		 const struct nf_hook_entries *e, unsigned int s)
{
	……
	for (; s < e->num_hook_entries; s++) {
		verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
		switch (verdict & NF_VERDICT_MASK) {
		case NF_ACCEPT:
			break;
		case NF_DROP:
			……
		case NF_QUEUE:
			ret = nf_queue(skb, state, s, verdict);
			if (ret == 1)
				continue;
			return ret;
		default:
			return 0;
		}
	}
        ……
}

nf_queue函数的主要逻辑在于__nf_queue

static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
		      unsigned int index, unsigned int queuenum){
……
struct nf_queue_entry *entry = NULL;
//创建一个nf_queue的成员
entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
*entry = (struct nf_queue_entry) {
         //引用网络包
        .skb	= skb,
        .state	= *state,
        .hook_index = index,
        .size	= sizeof(*entry) + route_key_size,
};
……
qh = rcu_dereference(nf_queue_handler);
status = qh->outfn(entry, queuenum);
……
}

这里主要逻辑在于nf_queue_handler的outfn函数 这个函数在初始化的时候就已经注册好了

static const struct nf_queue_handler nfqh = {
	.outfn		= nfqnl_enqueue_packet,
	.nf_hook_drop	= nfqnl_nf_hook_drop,
};
……
nf_register_queue_handler(&nfqh);
……
/*
 * Hook for nfnetlink_queue to register its queue handler.
 * We do this so that most of the NFQUEUE code can be modular.
 *
 * Once the queue is registered it must reinject all packets it
 * receives, no matter what.
 */

void nf_register_queue_handler(const struct nf_queue_handler *qh)
{
	/* should never happen, we only have one queueing backend in kernel */
	WARN_ON(rcu_access_pointer(nf_queue_handler));
	rcu_assign_pointer(nf_queue_handler, qh);
}

数据包进入nfqnl_enqueue_packet函数

static int
nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
{
……
//寻找到queuenum所对应的队列
/* rcu_read_lock()ed by nf_hook_thresh */
        queue = instance_lookup(q, queuenum);
        ……
        
    if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(skb))
		return __nfqnl_enqueue_packet(net, queue, entry);
}


static int
__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
			struct nf_queue_entry *entry)
{
//根据成员队列等构造消息包
    nskb = nfqnl_build_packet_message(net, queue, entry, &packet_id_ptr);
    ……
/* nfnetlink_unicast will either free the nskb or add it to a socket */
//通过nfnetlink发送数据包到指定程序
	err = nfnetlink_unicast(nskb, net, queue->peer_portid);
 ……
__enqueue_entry(queue, entry);
}

static inline void
__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
       list_add_tail(&entry->list, &queue->queue_list);
       queue->queue_total++;
}

数据包发送出去之后进入队列之中,这个步骤很重要,因为NF_QUEUE的决策权在用户程序那,所以在这里需要入队等待处理。

接收判决结果,进行处理

在nfqueue初始化的时候也注册了接收函数

static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
	……
	[NFQNL_MSG_VERDICT]	= {
		.call		= nfqnl_recv_verdict,
		.type		= NFNL_CB_RCU,
		.attr_count	= NFQA_MAX,
		.policy		= nfqa_verdict_policy
	},
	……
};
static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
			      const struct nlattr * const nfqa[])
{
//获取队列和包所对应成员
    queue = verdict_instance_lookup(q, queue_num,NETLINK_CB(skb).portid);
    vhdr = verdicthdr_get(nfqa);
    verdict = ntohl(vhdr->verdict);
    entry = find_dequeue_entry(queue, ntohl(vhdr->id));
    ……
    nfqnl_reinject(entry, verdict);
}

//从对列获取包并且直接出队
static void
__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
	list_del(&entry->list);
	queue->queue_total--;
}

由函数nfqnl_reinject和nf_reinject根据收到的决策结果作出处理。

static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
	struct nf_ct_hook *ct_hook;
	int err;

	if (verdict == NF_ACCEPT ||
	    verdict == NF_REPEAT ||
	    verdict == NF_STOP) {
		rcu_read_lock();
		ct_hook = rcu_dereference(nf_ct_hook);
		if (ct_hook) {
			err = ct_hook->update(entry->state.net, entry->skb);
			if (err < 0)
				verdict = NF_DROP;
		}
		rcu_read_unlock();
	}
	nf_reinject(entry, verdict);
}

void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
……
	switch (verdict & NF_VERDICT_MASK) {
	case NF_ACCEPT:
	case NF_STOP:
		local_bh_disable();
		entry->state.okfn(entry->state.net, entry->state.sk, skb);
		local_bh_enable();
		break;
	case NF_QUEUE:
		err = nf_queue(skb, &entry->state, i, verdict);
		if (err == 1)
			goto next_hook;
		break;
	case NF_STOLEN:
		break;
	default:
		kfree_skb(skb);
	}

	nf_queue_entry_free(entry);
}

根据决策作出相应动作,并且释放成员。 至此,整个NFQUEUE机制浅析完毕。