问题现象
# pod 内
root@node1021:~# k exec -it -n las-daemon-create-image image-68f0e27d94890244f60e53d2-rvlvn -- bash
root@image-68f0e27d94890244f60e53d2-rvlvn:~# ifconfig
eth0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1480
inet 10.199.103.181 netmask 255.255.255.255 broadcast 0.0.0.0
inet6 fe80::687f:33ff:feb5:220c prefixlen 64 scopeid 0x20<link>
ether 6a:7f:33:b5:22:0c txqueuelen 0 (Ethernet)
RX packets 3 bytes 444 (444.0 B)
RX errors 0 dropped 2 overruns 0 frame 0
TX packets 19 bytes 1434 (1.4 KB)
TX errors 0 dropped 1 overruns 0 carrier 0 collisions 0
lo: flags=73<UP,LOOPBACK,RUNNING> mtu 65536
inet 127.0.0.1 netmask 255.0.0.0
inet6 ::1 prefixlen 128 scopeid 0x10<host>
loop txqueuelen 1000 (Local Loopback)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
# pod 内无法访问 svc
root@image-68f0e27d94890244f60e53d2-rvlvn:~# curl -k https://10.233.0.1:443/healthz
command terminated with exit code 137
root@node1021:~#
跟踪丢包位置
root@node1022:~# nettrace -p tcp --addr 10.199.103.181
begin trace...
***************** 3baec600,3baec6e0,3baec6e0,3baec6e0,3baec6e0,3baec6e0 ***************
[9868408.922840] [__tcp_transmit_skb ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922853] [skb_clone ] unknow
[9868408.922859] [__ip_queue_xmit ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922862] [__ip_local_out ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922865] [ip_output ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922868] [nf_hook_slow ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *ipv4 in chain: POST_ROUTING*
[9868408.922871] [ip_finish_output ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922874] [ip_finish_output2 ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922877] [__dev_queue_xmit ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922880] [dev_hard_start_xmit ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *skb is successfully sent to the NIC driver*
[9868408.922884] [enqueue_to_backlog ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922888] [__netif_receive_skb_core] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922891] [packet_rcv ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922894] [ip_rcv ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922897] [ip_rcv_core ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922899] [nf_hook_slow ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *ipv4 in chain: PRE_ROUTING*
[9868408.922902] [ipt_do_table ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:raw, chain:PRE_ROUTING*
[9868408.922911] [nft_do_chain ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:raw, chain:PREROUT*
[9868408.922921] [nft_do_chain ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:mangle, chain:PREROUT*
[9868408.922926] [ipt_do_table ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:mangle, chain:PRE_ROUTING*
[9868408.922928] [ipt_do_table ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:nat, chain:PRE_ROUTING*
[9868408.922931] [nft_do_chain ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:nat, chain:PREROUT*
[9868408.922939] [ip_route_input_slow ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922942] [fib_validate_source ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922946] [ip_local_deliver ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868408.922947] [nf_hook_slow ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *ipv4 in chain: INPUT*
[9868408.922949] [ipt_do_table ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:mangle, chain:INPUT*
[9868408.922951] [ipt_do_table ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:filter, chain:INPUT*
[9868408.922956] [nft_do_chain ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:filter, chain:INPUT*
[9868408.922968] [kfree_skb ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *nf_hook_slow+0x9b* *packet is dropped by kernel*
[9868409.930984] [__tcp_retransmit_skb] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.930995] [__tcp_transmit_skb ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.930998] [skb_clone ] unknow
[9868409.931006] [__ip_queue_xmit ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931011] [__ip_local_out ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931014] [ip_output ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931018] [nf_hook_slow ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *ipv4 in chain: POST_ROUTING*
[9868409.931020] [ip_finish_output ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931023] [ip_finish_output2 ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931027] [__dev_queue_xmit ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931030] [dev_hard_start_xmit ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *skb is successfully sent to the NIC driver*
[9868409.931034] [enqueue_to_backlog ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931044] [__netif_receive_skb_core] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931050] [packet_rcv ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931053] [ip_rcv ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931058] [ip_rcv_core ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931060] [nf_hook_slow ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *ipv4 in chain: PRE_ROUTING*
[9868409.931063] [ipt_do_table ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:raw, chain:PRE_ROUTING*
[9868409.931074] [nft_do_chain ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:raw, chain:PREROUT*
[9868409.931086] [nft_do_chain ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:mangle, chain:PREROUT*
[9868409.931095] [ipt_do_table ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:mangle, chain:PRE_ROUTING*
[9868409.931098] [ipt_do_table ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:nat, chain:PRE_ROUTING*
[9868409.931101] [nft_do_chain ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:nat, chain:PREROUT*
[9868409.931109] [ip_route_input_slow ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931114] [fib_validate_source ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931118] [ip_local_deliver ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868409.931122] [nf_hook_slow ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *ipv4 in chain: INPUT*
[9868409.931124] [ipt_do_table ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:mangle, chain:INPUT*
[9868409.931125] [ipt_do_table ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:filter, chain:INPUT*
[9868409.931131] [nft_do_chain ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *iptables table:filter, chain:INPUT*
[9868409.931145] [kfree_skb ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S *nf_hook_slow+0x9b* *packet is dropped by kernel*
[9868411.946983] [__tcp_retransmit_skb] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868411.946994] [__tcp_transmit_skb ] TCP: 10.199.103.181:50940 -> 10.233.0.1:443 seq:2763303023, ack:0, flags:S
[9868411.946996] [skb_clone ] unknow
分析
iptables 与 NetworkPolicy 排查
从日志看,核心问题是 Pod 发送的 TCP SYN 包在节点的 iptables filter INPUT 链被内核丢弃,导致无法与 10.233.0.1(K8s API Server)建立连接。
1. 关键现象定位
通过 nettrace 日志可明确两个核心信息:
-
包已成功发出:日志中
dev_hard_start_xmit标记skb is successfully sent to the NIC driver,说明 Pod 发出的 SYN 包已到达节点网络层。 -
包被内核丢弃:日志末尾
kfree_skb标记packet is dropped by kernel,且丢弃位置在iptables table:filter, chain:INPUT,即节点的 iptables 过滤规则拦截了该包。
同时,Pod 内 curl 命令退出码 137 是进程被强制终止(OOM 或信号杀死),本质是连接超时后进程被回收,属于丢包的后续结果,而非根本原因。
2. 核心丢包原因分析
基于现象,丢包仅可能由以下两类规则拦截导致,且优先级从高到低排列:
-
节点 iptables filter INPUT 链规则
- K8s 节点默认会通过 iptables 处理网络流量,若
filter表的INPUT链中存在拒绝10.199.103.181:50940 -> 10.233.0.1:443的规则(如DROP动作),会直接丢弃包。 - 需重点检查是否有自定义 iptables 规则或安全组件(如 firewalld、ufw)添加了拦截策略。
- K8s 节点默认会通过 iptables 处理网络流量,若
-
K8s NetworkPolicy 规则
-
若命名空间
las-daemon-create-image或目标10.233.0.1(API Server)所在命名空间配置了 NetworkPolicy,且规则禁止 Pod 访问 API Server 的 443 端口,会通过 iptables 间接触发丢弃。 -
NetworkPolicy 的限制会转化为节点 iptables 规则,最终在
filter INPUT链生效。
-
3. 排查与解决步骤
步骤 1:检查节点 iptables filter INPUT 链规则
在执行 nettrace 的节点(jfcs1022)上执行以下命令,查看是否有拒绝规则:
# 查看 filter 表 INPUT 链的完整规则(按顺序执行,重点看 DROP/REJECT 规则)
iptables -t filter -L INPUT -nv --line-numbers
# 若规则过多,可过滤包含 10.199.103.181 或 10.233.0.1 的规则
iptables -t filter -L INPUT -nv | grep -E "10.199.103.181|10.233.0.1"
-
若找到包含
10.199.103.181(Pod IP)或10.233.0.1:443的DROP规则,需确认规则来源(手动添加/组件自动生成),并根据业务需求调整(如改为ACCEPT或删除规则)。
环境上的 iptables input 规则
root@node1022:~# iptables -t filter -L INPUT -nv --line-numbers
# Warning: iptables-legacy tables present, use iptables-legacy to see them
Chain INPUT (policy ACCEPT 0 packets, 0 bytes)
num pkts bytes target prot opt in out source destination
1 49435 4319K ACCEPT udp -- * * 0.0.0.0/0 169.254.25.10 /* NodeLocal DNS Cache: allow DNS traffic */
2 0 0 ACCEPT tcp -- * * 0.0.0.0/0 169.254.25.10 /* NodeLocal DNS Cache: allow DNS traffic */
3 13G 1218G KUBE-IPVS-FILTER all -- * * 0.0.0.0/0 0.0.0.0/0 /* kubernetes ipvs access filter */
4 13G 1218G KUBE-PROXY-FIREWALL all -- * * 0.0.0.0/0 0.0.0.0/0 /* kube-proxy firewall rules */
5 13G 1218G KUBE-NODE-PORT all -- * * 0.0.0.0/0 0.0.0.0/0 /* kubernetes health check rules */
6 13G 1218G KUBE-FIREWALL all -- * * 0.0.0.0/0 0.0.0.0/0
7 13G 1218G cali-INPUT all -- * * 0.0.0.0/0 0.0.0.0/0 /* cali:Cz_u1IQiXIMmKD4c */
root@node1022:~#
## 清理 iptables 恢复之后
# iptables -t filter -L INPUT -nv --line-numbers
# Warning: iptables-legacy tables present, use iptables-legacy to see them
Chain INPUT (policy ACCEPT 142M packets, 12G bytes)
num pkts bytes target prot opt in out source destination
1 142M 12G KUBE-FIREWALL all -- * * 0.0.0.0/0 0.0.0.0/0
2 1115 112K ACCEPT udp -- * * 0.0.0.0/0 169.254.25.10 /* NodeLocal DNS Cache: allow DNS traffic */
3 0 0 ACCEPT tcp -- * * 0.0.0.0/0 169.254.25.10 /* NodeLocal DNS Cache: allow DNS traffic */
4 142M 12G KUBE-IPVS-FILTER all -- * * 0.0.0.0/0 0.0.0.0/0 /* kubernetes ipvs access filter */
5 142M 12G KUBE-PROXY-FIREWALL all -- * * 0.0.0.0/0 0.0.0.0/0 /* kube-proxy firewall rules */
6 142M 12G KUBE-NODE-PORT all -- * * 0.0.0.0/0 0.0.0.0/0 /* kubernetes health check rules */
我对比了这几条 input 规则,确实和正常节点先后顺序不一样,由于跟踪记录中只显示了一个 input 规则(全0匹配的第一个规则),所以可以优先怀疑 KUBE-IPVS-FILTER 的顺序应该不是第三位
然后按照重启的思路恢复
重启恢复
清理所有的 iptables,kube-proxy 会自动恢复,然后重启 calico daemon pod
# 清理 filter 表(默认表)
iptables -F # 清空所有链的规则
iptables -X # 删除所有自定义链
iptables -Z # 重置所有计数器
# 清理 nat 表(网络地址转换规则)
iptables -t nat -F
iptables -t nat -X
iptables -t nat -Z
# 清理 mangle 表(数据包修改规则)
iptables -t mangle -F
iptables -t mangle -X
iptables -t mangle -Z
# 清理 raw 表(连接跟踪规则)
iptables -t raw -F
iptables -t raw -X
iptables -t raw -Z
# 清理 security 表(SELinux 相关规则)
iptables -t security -F
iptables -t security -X
iptables -t security -Z
恢复规则(若需回滚)
bash
# 从备份文件恢复规则
iptables-restore < /tmp/iptables-backup-xxxx-xx-xx.rules
# 重启网络组件使其生效
kubectl rollout restart daemonset kube-proxy calico-node -n kube-system
再次强调:生产环境绝对禁止执行以上操作。若需解决特定规则问题,建议针对性删除或修改相关链的规则(如通过 iptables -D 链名 规则序号),而非全盘清理。