Kubernetes对Pod进行抓包

3,901 阅读12分钟

环境配置

#deployment配置
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  labels:
    app: cheese
  name: cheddar
  namespace: traefik
spec:
  replicas: 3
  selector:
    matchLabels:
      app: cheese
  template:
    metadata:
      labels:
        app: cheese
    spec:
      containers:
      - image: errm/cheese:cheddar
        imagePullPolicy: IfNotPresent
        name: cheese
        ports:
        - containerPort: 80
          protocol: TCP
      restartPolicy: Always

#service配置,配置了会话保持,获取客户端真实IP等参数,通过Nodeport方式直接对外暴露
apiVersion: v1
kind: Service
metadata:
  name: cheddar
  namespace: traefik
spec:
  clusterIP: 10.101.171.243
  #Local参数用于获取客户端的真实IP。如果不加这个参数默认是externalTrafficPolicy: Cluster,当通过节点端口接收到连接时,由于对数据包执行了源网络地址转换(SNAT),在pod看到的就是node访问的ip 
  externalTrafficPolicy: Local 
  ports:
  - name: http
    nodePort: 32604
    port: 80
    protocol: TCP
    targetPort: 80
  selector:
    app: cheese
  sessionAffinity: ClientIP  #开启会话保持,目前只支持客户端源地址会话保持
  sessionAffinityConfig:
    clientIP:
      timeoutSeconds: 10800 #默认值是10800s
  type: NodePort

基础介绍

  • 本次实验kubernetes网络插件使用flannel。
  • flanneld为每个Pod创建一对veth虚拟设备,一端放在容器接口上,另一端放在cni0 bridge上。
  • pod网卡的iflink值等于宿主机对应的veth网卡的ifindex的值。 宿主机不关于pod的网卡,例如ens192(宿主机物理网卡)等,iflink和ifindex的值是相等的。抓取pod接口的数据包,实际上就是在pod对应的宿主机上抓取veth*接口的数据包。
  • 客户端直接访问pod是不通的,本次实验客户端通过访问NodePort类型的service暴露在的宿主机端口,从而访问到pod。
#通过brctl查看网桥信息,可以看到cni0下挂了很多veth接口
[root@k8s-node01 veth09b7438a]#  brctl show
bridge name     bridge id               STP enabled     interfaces
cni0            8000.92e01cf960fd       no              veth09b7438a
                                                        veth2fc2c1b6
                                                        veth4cabed1c
                                                        veth7e1054b7
                                                        vethc294c850
docker0         8000.0242fafc97d0       no              veth9bab9e7
                                                        vetha8c2590
virbr0          8000.525400c841b1       yes             virbr0-nic

#查看宿主机的路由,出接口为cni0的路由通往宿主机本地的pod
#出接口为flannel.1的路由通往其他宿主机的pod
[root@k8s-node01 veth09b7438a]# route -n
Kernel IP routing table
Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
0.0.0.0         192.168.1.1     0.0.0.0         UG    100    0        0 ens192
10.244.0.0      10.244.0.0      255.255.255.0   UG    0      0        0 flannel.1
10.244.1.0      0.0.0.0         255.255.255.0   U     0      0        0 cni0
10.244.2.0      10.244.2.0      255.255.255.0   UG    0      0        0 flannel.1
10.244.3.0      10.244.3.0      255.255.255.0   UG    0      0        0 flannel.1
10.244.5.0      10.244.5.0      255.255.255.0   UG    0      0        0 flannel.1
172.17.0.0      0.0.0.0         255.255.0.0     U     0      0        0 docker0
192.168.1.0     0.0.0.0         255.255.255.0   U     100    0        0 ens192
192.168.122.0   0.0.0.0         255.255.255.0   U     0      0        0 virbr0

#查看宿主机的网卡信息,cni0的地址为10.244.1.1,后面的抓包会看到这个地址,外部客户端通过NodePort方式访问pod时,node会将客户端地址进行SNAT转化成cni0的地址,然后去和pod通信。
[root@k8s-node01 veth09b7438a]# ifconfig
cni0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1450
        inet 10.244.1.1  netmask 255.255.255.0  broadcast 0.0.0.0
        ether 92:e0:1c:f9:60:fd  txqueuelen 1000  (Ethernet)
        RX packets 8681993  bytes 523607971 (499.3 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 12391563  bytes 1394170401 (1.2 GiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

docker0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 172.17.0.1  netmask 255.255.0.0  broadcast 172.17.255.255
        ether 02:42:fa:fc:97:d0  txqueuelen 0  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

ens192: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.1.20  netmask 255.255.255.0  broadcast 192.168.1.255
        ether 00:50:56:8b:56:40  txqueuelen 1000  (Ethernet)
        RX packets 146189166  bytes 25848535308 (24.0 GiB)
        RX errors 0  dropped 3192  overruns 0  frame 0
        TX packets 30908753  bytes 3503761108 (3.2 GiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

flannel.1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1450
        inet 10.244.1.0  netmask 255.255.255.255  broadcast 0.0.0.0
        ether 2a:fc:21:95:d1:22  txqueuelen 0  (Ethernet)
        RX packets 9682333  bytes 533679556 (508.9 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 5696298  bytes 338960372 (323.2 MiB)
        TX errors 0  dropped 17 overruns 0  carrier 0  collisions 0

lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        loop  txqueuelen 1000  (Local Loopback)
        RX packets 11286977  bytes 990397432 (944.5 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 11286977  bytes 990397432 (944.5 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

veth09b7438a: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1450
        ether 76:04:a5:b3:2b:e4  txqueuelen 0  (Ethernet)
        RX packets 609  bytes 122621 (119.7 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 824  bytes 75438 (73.6 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

veth2fc2c1b6: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1450
        ether 96:0b:d0:9e:48:6e  txqueuelen 0  (Ethernet)
        RX packets 1  bytes 42 (42.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 54995  bytes 2316982 (2.2 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

veth4cabed1c: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1450
        ether 56:b8:c9:82:fe:d8  txqueuelen 0  (Ethernet)
        RX packets 1  bytes 42 (42.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 7  bytes 394 (394.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

veth7e1054b7: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1450
        ether 4a:37:54:93:bf:52  txqueuelen 0  (Ethernet)
        RX packets 1  bytes 42 (42.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 7  bytes 394 (394.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

veth9bab9e7: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        ether 0a:4a:20:a7:1f:d0  txqueuelen 0  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

vetha8c2590: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        ether 66:3b:fd:6d:1d:33  txqueuelen 0  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

vethc294c850: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1450
        ether 46:3d:cd:01:49:02  txqueuelen 0  (Ethernet)
        RX packets 1  bytes 42 (42.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 54984  bytes 2316520 (2.2 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

virbr0: flags=4099<UP,BROADCAST,MULTICAST>  mtu 1500
        inet 192.168.122.1  netmask 255.255.255.0  broadcast 192.168.122.255
        ether 52:54:00:c8:41:b1  txqueuelen 1000  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

第一步:在master节点上列出待抓包的pod及分布的node的情况

下面的步骤以node01上的cheddar-845749dbd6-zcdjm这个pod为例,其余两个pod方法一样。

#在Kubernetes master节点上执行以下命令
[root@k8s-master01 ~]# kubectl get pod -n traefik -o wide
NAME                                          READY   STATUS        RESTARTS   AGE     IP             NODE         NOMINATED NODE   READINESS GATES
cheddar-845749dbd6-8zf5x                      1/1     Running       0          69m     10.244.2.10    k8s-node02   <none>           <none>
cheddar-845749dbd6-wjpb6                      1/1     Running       0          69m     10.244.5.185   k8s-node03   <none>           <none>
cheddar-845749dbd6-zcdjm                      1/1     Running       0          69m     10.244.1.132   k8s-node01   <none>           <none>

第二步:获取pod的iflink id

方式一:

#在Kubernetes master节点上执行以下命令
[root@k8s-master01 veth4387343]# kubectl exec -n traefik cheddar-845749dbd6-zcdjm  -- cat "/sys/class/net/eth0/iflink" 
111

方式二:

#1.在master上获取Container ID
[root@k8s-master01 ~]# kubectl describe pod -n traefik  cheddar-845749dbd6-zcdjm  
Name:           cheddar-845749dbd6-zcdjm
Namespace:      traefik
Priority:       0
Node:           k8s-node01/192.168.1.20
Start Time:     Tue, 31 Mar 2020 09:16:40 +0800
Labels:         app=cheese
                pod-template-hash=845749dbd6
                task=cheddar
                version=v0.0.1
Annotations:    <none>
Status:         Running
IP:             10.244.1.132
Controlled By:  ReplicaSet/cheddar-845749dbd6
Containers:
  cheese:
    Container ID:   docker://625a392af42a809896fb2942f4cfa27c589786a2db6626d583a49178d3e56575
    Image:          errm/cheese:cheddar
    Image ID:       docker://sha256:05cbb2fce885009ab9540ecef52d54ffd5829e6a2700058258e36f8762267f9b
    Port:           80/TCP
    Host Port:      0/TCP
    State:          Running
      Started:      Tue, 31 Mar 2020 09:16:47 +0800
    Ready:          True
    Restart Count:  0
    Limits:
      cpu:     100m
      memory:  50Mi
    Requests:
      cpu:        100m
      memory:     50Mi
    Environment:  <none>
    Mounts:
      /var/run/secrets/kubernetes.io/serviceaccount from default-token-cfnpp (ro)
Conditions:
  Type              Status
  Initialized       True 
  Ready             True 
  ContainersReady   True 
  PodScheduled      True 
Volumes:
  default-token-cfnpp:
    Type:        Secret (a volume populated by a Secret)
    SecretName:  default-token-cfnpp
    Optional:    false
QoS Class:       Guaranteed
Node-Selectors:  <none>
Tolerations:     node.kubernetes.io/not-ready:NoExecute for 300s
                 node.kubernetes.io/unreachable:NoExecute for 300s
Events:          <none>


#2.通过在Container ID在对应的node上通过docker命令查看
[root@k8s-node01 ~]# docker ps |  grep  625a392a  #过滤条件为前面找到的Container ID的前面一部分
625a392af42a        05cbb2fce885             "nginx -g 'daemon of…"   4 hours ago         Up 4 hours                              k8s_cheese_cheddar-845749dbd6-zcdjm_traefik_862b98ff-bdf2-4398-86c9-a5492b7085c6_0
[root@k8s-node01 ~]# docker exec  625a392af42a sh -c "cat /sys/class/net/eth0/iflink"
111

第三步:在node上遍历veth开头的网卡,找到ifindex值与前面找到的iflink匹配的网卡

#在node节点执行以下命令
#111为第二步找到的pod的iflink值
[root@k8s-node01 ~]# for i in /sys/class/net/veth*/ifindex; do grep -l 111 $i; done
/sys/class/net/veth09b7438a/ifindex

第四步:在node上通过tcpdump -i 网卡名字 命令来抓包

#在node节点执行以下命令
[root@k8s-node01 ~]# tcpdump  -i veth09b7438a dst port 80 -n
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on veth09b7438a, link-type EN10MB (Ethernet), capture size 262144 bytes

验证externalTrafficPolicy参数

  • 客户端地址为192.168.1.35
  • node01物理接口地址为:192.168.1.20
  • node01 cni0接口地址为:10.244.1.1
  • 在客户端通过curl http://192.168.1.20:32604 命令访问,NodePort类型的service在宿主机上开启了32604端口的监听。当客户端访问32604端口时,会将流量负载给service关联的pod。

不加externalTrafficPolicy参数的情况下: 抓包和查看pod地址看到的源地址是node01的cni0接口地址,说明node先对源地址做了SNAT再去访问pod。

#10.244.1.1为node01的cni接口
[root@k8s-node01 veth09b7438a]# tcpdump -i veth09b7438a  dst port 80 -n
10:21:56.883197 IP 10.244.1.1.61273 > 10.244.1.132.http: Flags [SEW], seq 3221112694, win 65535, options [mss 1260,nop,wscale 6,nop,nop,TS val 705826773 ecr 0,sackOK,eol], length 0
10:21:56.907770 IP 10.244.1.1.61273 > 10.244.1.132.http: Flags [.], ack 969928634, win 2067, options [nop,nop,TS val 705826795 ecr 2145591097], length 0
10:21:56.907858 IP 10.244.1.1.61273 > 10.244.1.132.http: Flags [P.], seq 0:82, ack 1, win 2067, options [nop,nop,TS val 705826795 ecr 2145591097], length 82: HTTP: GET / HTTP/1.1
10:21:56.930678 IP 10.244.1.1.61273 > 10.244.1.132.http: Flags [.], ack 239, win 2063, options [nop,nop,TS val 705826819 ecr 2145591122], length 0
10:21:56.935045 IP 10.244.1.1.61273 > 10.244.1.132.http: Flags [.], ack 756, win 2055, options [nop,nop,TS val 705826823 ecr 2145591122], length 0
10:21:56.935761 IP 10.244.1.1.61273 > 10.244.1.132.http: Flags [F.], seq 82, ack 756, win 2055, options [nop,nop,TS val 705826823 ecr 2145591122], length 0
#log -f参数持续输出日志
[root@k8s-master01 ~]# kubectl logs -f -n traefik cheddar-845749dbd6-zcdjm  
10.244.1.1 - - [31/Mar/2020:02:21:57 +0000] "GET / HTTP/1.1" 200 517 "-" "curl/7.64.1" "-"
10.244.1.1 - - [31/Mar/2020:02:21:57 +0000] "GET / HTTP/1.1" 200 517 "-" "curl/7.64.1" "-"

添加externalTrafficPolicy:Local参数的情况下: 抓包和查看pod日志都可以看到源地址为客户端的ip地址。

[root@k8s-node01 veth09b7438a]# tcpdump -i veth09b7438a  dst port 80 -n
10:22:16.145978 IP 192.168.1.35.61334 > 10.244.1.132.http: Flags [SEW], seq 3383422782, win 65535, options [mss 1260,nop,wscale 6,nop,nop,TS val 705845725 ecr 0,sackOK,eol], length 0
10:22:16.169527 IP 192.168.1.35.61334 > 10.244.1.132.http: Flags [.], ack 337281323, win 2067, options [nop,nop,TS val 705845749 ecr 2145610360], length 0
10:22:16.169553 IP 192.168.1.35.61334 > 10.244.1.132.http: Flags [P.], seq 0:82, ack 1, win 2067, options [nop,nop,TS val 705845749 ecr 2145610360], length 82: HTTP: GET / HTTP/1.1
10:22:16.194224 IP 192.168.1.35.61334 > 10.244.1.132.http: Flags [.], ack 239, win 2063, options [nop,nop,TS val 705845772 ecr 2145610384], length 0
10:22:16.194270 IP 192.168.1.35.61334 > 10.244.1.132.http: Flags [.], ack 756, win 2055, options [nop,nop,TS val 705845772 ecr 2145610384], length 0
10:22:16.194495 IP 192.168.1.35.61334 > 10.244.1.132.http: Flags [F.], seq 82, ack 756, win 2055, options [nop,nop,TS val 705845773 ecr 2145610384], length 0
[root@k8s-master01 ~]# kubectl logs -f -n traefik cheddar-845749dbd6-zcdjm  
192.168.1.35 - - [31/Mar/2020:02:22:16 +0000] "GET / HTTP/1.1" 200 517 "-" "curl/7.64.1" "-"
192.168.1.35 - - [31/Mar/2020:02:22:16 +0000] "GET / HTTP/1.1" 200 517 "-" "curl/7.64.1" "-"