上篇文章讲了NFQUEUE的使用以及在用户态程序上面的情况,本篇主要涉及内核态的nf_queue的在内核中的动作。
内核eBPF追踪nf_queue
sudo bpftrace -e 'k:nf_queue {@[kstack] = count(); }'
查看nf_queue的内核函数栈
内核中功能逻辑分为两部分:
- 接收指定iptables符合规则的包,并通过 nfnetlink 将网络包发给用户程序
@[
nf_queue+1
nf_hook_slow+122
ip_local_deliver+195
ip_rcv+389
__netif_receive_skb_core.constprop.0+1547
__netif_receive_skb_list_core+314
netif_receive_skb_list_internal+490
napi_complete_done+109
iwl_pcie_napi_poll_msix+162
__napi_poll+75
net_rx_action+641
__softirqentry_text_start+205
do_softirq+196
__local_bh_enable_ip+108
iwl_pcie_irq_rx_msix_handler+189
irq_thread_fn+28
irq_thread+233
kthread+288
ret_from_fork+31
]
- 接收用户程序通过 nfnetlink 发送过来的判决结果,并进行处理
@[
nfqnl_recv_verdict+1
nfnetlink_rcv_msg+484
netlink_rcv_skb+78
netlink_unicast+580
netlink_sendmsg+594
sock_sendmsg+98
__sys_sendto+275
__x64_sys_sendto+32
do_syscall_64+88
entry_SYSCALL_64_after_hwframe+97
]
nf_queue发送逻辑
在nf_hook_slow switch里面的逻辑对应的-j的逻辑,-j NFQUEUE即对应NF_QUEUE的逻辑,接下来调用nf_queue()
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
const struct nf_hook_entries *e, unsigned int s)
{
……
for (; s < e->num_hook_entries; s++) {
verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
break;
case NF_DROP:
……
case NF_QUEUE:
ret = nf_queue(skb, state, s, verdict);
if (ret == 1)
continue;
return ret;
default:
return 0;
}
}
……
}
nf_queue函数的主要逻辑在于__nf_queue
static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
unsigned int index, unsigned int queuenum){
……
struct nf_queue_entry *entry = NULL;
//创建一个nf_queue的成员
entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
*entry = (struct nf_queue_entry) {
//引用网络包
.skb = skb,
.state = *state,
.hook_index = index,
.size = sizeof(*entry) + route_key_size,
};
……
qh = rcu_dereference(nf_queue_handler);
status = qh->outfn(entry, queuenum);
……
}
这里主要逻辑在于nf_queue_handler的outfn函数 这个函数在初始化的时候就已经注册好了
static const struct nf_queue_handler nfqh = {
.outfn = nfqnl_enqueue_packet,
.nf_hook_drop = nfqnl_nf_hook_drop,
};
……
nf_register_queue_handler(&nfqh);
……
/*
* Hook for nfnetlink_queue to register its queue handler.
* We do this so that most of the NFQUEUE code can be modular.
*
* Once the queue is registered it must reinject all packets it
* receives, no matter what.
*/
void nf_register_queue_handler(const struct nf_queue_handler *qh)
{
/* should never happen, we only have one queueing backend in kernel */
WARN_ON(rcu_access_pointer(nf_queue_handler));
rcu_assign_pointer(nf_queue_handler, qh);
}
数据包进入nfqnl_enqueue_packet函数
static int
nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
{
……
//寻找到queuenum所对应的队列
/* rcu_read_lock()ed by nf_hook_thresh */
queue = instance_lookup(q, queuenum);
……
if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(skb))
return __nfqnl_enqueue_packet(net, queue, entry);
}
static int
__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry)
{
//根据成员队列等构造消息包
nskb = nfqnl_build_packet_message(net, queue, entry, &packet_id_ptr);
……
/* nfnetlink_unicast will either free the nskb or add it to a socket */
//通过nfnetlink发送数据包到指定程序
err = nfnetlink_unicast(nskb, net, queue->peer_portid);
……
__enqueue_entry(queue, entry);
}
static inline void
__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
list_add_tail(&entry->list, &queue->queue_list);
queue->queue_total++;
}
数据包发送出去之后进入队列之中,这个步骤很重要,因为NF_QUEUE的决策权在用户程序那,所以在这里需要入队等待处理。
接收判决结果,进行处理
在nfqueue初始化的时候也注册了接收函数
static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
……
[NFQNL_MSG_VERDICT] = {
.call = nfqnl_recv_verdict,
.type = NFNL_CB_RCU,
.attr_count = NFQA_MAX,
.policy = nfqa_verdict_policy
},
……
};
static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nfqa[])
{
//获取队列和包所对应成员
queue = verdict_instance_lookup(q, queue_num,NETLINK_CB(skb).portid);
vhdr = verdicthdr_get(nfqa);
verdict = ntohl(vhdr->verdict);
entry = find_dequeue_entry(queue, ntohl(vhdr->id));
……
nfqnl_reinject(entry, verdict);
}
//从对列获取包并且直接出队
static void
__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
{
list_del(&entry->list);
queue->queue_total--;
}
由函数nfqnl_reinject和nf_reinject根据收到的决策结果作出处理。
static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
struct nf_ct_hook *ct_hook;
int err;
if (verdict == NF_ACCEPT ||
verdict == NF_REPEAT ||
verdict == NF_STOP) {
rcu_read_lock();
ct_hook = rcu_dereference(nf_ct_hook);
if (ct_hook) {
err = ct_hook->update(entry->state.net, entry->skb);
if (err < 0)
verdict = NF_DROP;
}
rcu_read_unlock();
}
nf_reinject(entry, verdict);
}
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
……
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_STOP:
local_bh_disable();
entry->state.okfn(entry->state.net, entry->state.sk, skb);
local_bh_enable();
break;
case NF_QUEUE:
err = nf_queue(skb, &entry->state, i, verdict);
if (err == 1)
goto next_hook;
break;
case NF_STOLEN:
break;
default:
kfree_skb(skb);
}
nf_queue_entry_free(entry);
}
根据决策作出相应动作,并且释放成员。 至此,整个NFQUEUE机制浅析完毕。