calico pod -> svc 丢包

42 阅读10分钟

问题现象



# pod 内
root@node1021:~# k exec -it -n las-daemon-create-image          image-68f0e27d94890244f60e53d2-rvlvn -- bash
root@image-68f0e27d94890244f60e53d2-rvlvn:~# ifconfig
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1480
        inet 10.199.103.181  netmask 255.255.255.255  broadcast 0.0.0.0
        inet6 fe80::687f:33ff:feb5:220c  prefixlen 64  scopeid 0x20<link>
        ether 6a:7f:33:b5:22:0c  txqueuelen 0  (Ethernet)
        RX packets 3  bytes 444 (444.0 B)
        RX errors 0  dropped 2  overruns 0  frame 0
        TX packets 19  bytes 1434 (1.4 KB)
        TX errors 0  dropped 1 overruns 0  carrier 0  collisions 0

lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        inet6 ::1  prefixlen 128  scopeid 0x10<host>
        loop  txqueuelen 1000  (Local Loopback)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

# pod 内无法访问 svc
root@image-68f0e27d94890244f60e53d2-rvlvn:~# curl -k https://10.233.0.1:443/healthz

command terminated with exit code 137
root@node1021:~#

跟踪丢包位置


root@node1022:~# nettrace -p tcp --addr 10.199.103.181
begin trace...
***************** 3baec600,3baec6e0,3baec6e0,3baec6e0,3baec6e0,3baec6e0 ***************
[9868408.922840] [__tcp_transmit_skb  ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922853] [skb_clone           ] unknow
[9868408.922859] [__ip_queue_xmit     ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922862] [__ip_local_out      ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922865] [ip_output           ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922868] [nf_hook_slow        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *ipv4 in chain: POST_ROUTING*
[9868408.922871] [ip_finish_output    ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922874] [ip_finish_output2   ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922877] [__dev_queue_xmit    ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922880] [dev_hard_start_xmit ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *skb is successfully sent to the NIC driver*
[9868408.922884] [enqueue_to_backlog  ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922888] [__netif_receive_skb_core] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922891] [packet_rcv          ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922894] [ip_rcv              ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922897] [ip_rcv_core         ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922899] [nf_hook_slow        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *ipv4 in chain: PRE_ROUTING*
[9868408.922902] [ipt_do_table        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:raw, chain:PRE_ROUTING*
[9868408.922911] [nft_do_chain        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:raw, chain:PREROUT*
[9868408.922921] [nft_do_chain        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:mangle, chain:PREROUT*
[9868408.922926] [ipt_do_table        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:mangle, chain:PRE_ROUTING*
[9868408.922928] [ipt_do_table        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:nat, chain:PRE_ROUTING*
[9868408.922931] [nft_do_chain        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:nat, chain:PREROUT*
[9868408.922939] [ip_route_input_slow ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922942] [fib_validate_source ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922946] [ip_local_deliver    ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922947] [nf_hook_slow        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *ipv4 in chain: INPUT*
[9868408.922949] [ipt_do_table        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:mangle, chain:INPUT*
[9868408.922951] [ipt_do_table        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:filter, chain:INPUT*
[9868408.922956] [nft_do_chain        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:filter, chain:INPUT*
[9868408.922968] [kfree_skb           ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *nf_hook_slow+0x9b* *packet is dropped by kernel*


[9868409.930984] [__tcp_retransmit_skb] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.930995] [__tcp_transmit_skb  ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.930998] [skb_clone           ] unknow
[9868409.931006] [__ip_queue_xmit     ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931011] [__ip_local_out      ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931014] [ip_output           ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931018] [nf_hook_slow        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *ipv4 in chain: POST_ROUTING*
[9868409.931020] [ip_finish_output    ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931023] [ip_finish_output2   ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931027] [__dev_queue_xmit    ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931030] [dev_hard_start_xmit ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *skb is successfully sent to the NIC driver*
[9868409.931034] [enqueue_to_backlog  ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931044] [__netif_receive_skb_core] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931050] [packet_rcv          ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931053] [ip_rcv              ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931058] [ip_rcv_core         ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931060] [nf_hook_slow        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *ipv4 in chain: PRE_ROUTING*
[9868409.931063] [ipt_do_table        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:raw, chain:PRE_ROUTING*
[9868409.931074] [nft_do_chain        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:raw, chain:PREROUT*
[9868409.931086] [nft_do_chain        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:mangle, chain:PREROUT*
[9868409.931095] [ipt_do_table        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:mangle, chain:PRE_ROUTING*
[9868409.931098] [ipt_do_table        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:nat, chain:PRE_ROUTING*
[9868409.931101] [nft_do_chain        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:nat, chain:PREROUT*
[9868409.931109] [ip_route_input_slow ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931114] [fib_validate_source ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931118] [ip_local_deliver    ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931122] [nf_hook_slow        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *ipv4 in chain: INPUT*
[9868409.931124] [ipt_do_table        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:mangle, chain:INPUT*
[9868409.931125] [ipt_do_table        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:filter, chain:INPUT*
[9868409.931131] [nft_do_chain        ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:filter, chain:INPUT*
[9868409.931145] [kfree_skb           ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *nf_hook_slow+0x9b* *packet is dropped by kernel*
[9868411.946983] [__tcp_retransmit_skb] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868411.946994] [__tcp_transmit_skb  ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868411.946996] [skb_clone           ] unknow  





分析

iptables 与 NetworkPolicy 排查

从日志看,核心问题是 Pod 发送的 TCP SYN 包在节点的 iptables filter INPUT 链被内核丢弃,导致无法与 10.233.0.1(K8s API Server)建立连接。

1. 关键现象定位

通过 nettrace 日志可明确两个核心信息:

  • 包已成功发出:日志中 dev_hard_start_xmit 标记 skb is successfully sent to the NIC driver,说明 Pod 发出的 SYN 包已到达节点网络层。

  • 包被内核丢弃:日志末尾 kfree_skb 标记 packet is dropped by kernel,且丢弃位置在 iptables table:filter, chain:INPUT,即节点的 iptables 过滤规则拦截了该包。

同时,Pod 内 curl 命令退出码 137 是进程被强制终止(OOM 或信号杀死),本质是连接超时后进程被回收,属于丢包的后续结果,而非根本原因。

2. 核心丢包原因分析

基于现象,丢包仅可能由以下两类规则拦截导致,且优先级从高到低排列:

  1. 节点 iptables filter INPUT 链规则

    1. K8s 节点默认会通过 iptables 处理网络流量,若 filter 表的 INPUT 链中存在拒绝 10.199.103.181:50940 -> 10.233.0.1:443 的规则(如 DROP 动作),会直接丢弃包。
    2. 需重点检查是否有自定义 iptables 规则或安全组件(如 firewalld、ufw)添加了拦截策略。
  2. K8s NetworkPolicy 规则

    1. 若命名空间 las-daemon-create-image 或目标 10.233.0.1(API Server)所在命名空间配置了 NetworkPolicy,且规则禁止 Pod 访问 API Server 的 443 端口,会通过 iptables 间接触发丢弃。

    2. NetworkPolicy 的限制会转化为节点 iptables 规则,最终在 filter INPUT 链生效。

3. 排查与解决步骤

步骤 1:检查节点 iptables filter INPUT 链规则

在执行 nettrace 的节点(jfcs1022)上执行以下命令,查看是否有拒绝规则:

# 查看 filterINPUT 链的完整规则(按顺序执行,重点看 DROP/REJECT 规则)
iptables -t filter -L INPUT -nv --line-numbers
# 若规则过多,可过滤包含 10.199.103.18110.233.0.1 的规则
iptables -t filter -L INPUT -nv | grep -E "10.199.103.181|10.233.0.1"
  • 若找到包含 10.199.103.181(Pod IP)或 10.233.0.1:443DROP 规则,需确认规则来源(手动添加/组件自动生成),并根据业务需求调整(如改为 ACCEPT 或删除规则)。

环境上的 iptables input 规则


root@node1022:~# iptables -t filter -L INPUT -nv --line-numbers
# Warning: iptables-legacy tables present, use iptables-legacy to see them
Chain INPUT (policy ACCEPT 0 packets, 0 bytes)
num   pkts bytes target     prot opt in     out     source               destination
1    49435 4319K ACCEPT     udp  --  *      *       0.0.0.0/0            169.254.25.10        /* NodeLocal DNS Cache: allow DNS traffic */
2        0     0 ACCEPT     tcp  --  *      *       0.0.0.0/0            169.254.25.10        /* NodeLocal DNS Cache: allow DNS traffic */
3      13G 1218G KUBE-IPVS-FILTER  all  --  *      *       0.0.0.0/0            0.0.0.0/0            /* kubernetes ipvs access filter */
4      13G 1218G KUBE-PROXY-FIREWALL  all  --  *      *       0.0.0.0/0            0.0.0.0/0            /* kube-proxy firewall rules */
5      13G 1218G KUBE-NODE-PORT  all  --  *      *       0.0.0.0/0            0.0.0.0/0            /* kubernetes health check rules */
6      13G 1218G KUBE-FIREWALL  all  --  *      *       0.0.0.0/0            0.0.0.0/0
7      13G 1218G cali-INPUT  all  --  *      *       0.0.0.0/0            0.0.0.0/0            /* cali:Cz_u1IQiXIMmKD4c */
root@node1022:~#



## 清理 iptables 恢复之后

# iptables -t filter -L INPUT -nv --line-numbers
# Warning: iptables-legacy tables present, use iptables-legacy to see them
Chain INPUT (policy ACCEPT 142M packets, 12G bytes)
num   pkts bytes target     prot opt in     out     source               destination
1     142M   12G KUBE-FIREWALL  all  --  *      *       0.0.0.0/0            0.0.0.0/0
2     1115  112K ACCEPT     udp  --  *      *       0.0.0.0/0            169.254.25.10        /* NodeLocal DNS Cache: allow DNS traffic */
3        0     0 ACCEPT     tcp  --  *      *       0.0.0.0/0            169.254.25.10        /* NodeLocal DNS Cache: allow DNS traffic */
4     142M   12G KUBE-IPVS-FILTER  all  --  *      *       0.0.0.0/0            0.0.0.0/0            /* kubernetes ipvs access filter */
5     142M   12G KUBE-PROXY-FIREWALL  all  --  *      *       0.0.0.0/0            0.0.0.0/0            /* kube-proxy firewall rules */
6     142M   12G KUBE-NODE-PORT  all  --  *      *       0.0.0.0/0            0.0.0.0/0            /* kubernetes health check rules */


我对比了这几条 input 规则,确实和正常节点先后顺序不一样,由于跟踪记录中只显示了一个 input 规则(全0匹配的第一个规则),所以可以优先怀疑 KUBE-IPVS-FILTER 的顺序应该不是第三位

然后按照重启的思路恢复

重启恢复

清理所有的 iptables,kube-proxy 会自动恢复,然后重启 calico daemon pod


# 清理 filter 表(默认表)
iptables -F  # 清空所有链的规则
iptables -X  # 删除所有自定义链
iptables -Z  # 重置所有计数器

# 清理 nat 表(网络地址转换规则)
iptables -t nat -F
iptables -t nat -X
iptables -t nat -Z

# 清理 mangle 表(数据包修改规则)
iptables -t mangle -F
iptables -t mangle -X
iptables -t mangle -Z

# 清理 raw 表(连接跟踪规则)
iptables -t raw -F
iptables -t raw -X
iptables -t raw -Z

# 清理 security 表(SELinux 相关规则)
iptables -t security -F
iptables -t security -X
iptables -t security -Z

恢复规则(若需回滚)

bash

# 从备份文件恢复规则
iptables-restore < /tmp/iptables-backup-xxxx-xx-xx.rules
# 重启网络组件使其生效
kubectl rollout restart daemonset kube-proxy calico-node -n kube-system

再次强调:生产环境绝对禁止执行以上操作。若需解决特定规则问题,建议针对性删除或修改相关链的规则(如通过 iptables -D 链名 规则序号),而非全盘清理。