断电后:修复 k8s 控制面

27 阅读2分钟

本来以为 kubespray 可以自动修复的,但是尝试了下,感觉确实修复不了,由于感觉 etcd 都是正常的,所以直接重建下静态 pod 即可

问题现象如下: 其他 pod 都正常,就是 kube- 控制面 pod 健康检查有问题。



root@k8s-ctrl:~/kubespray1-31# kgp
NAMESPACE       NAME                                       READY   STATUS    RESTARTS   AGE   IP               NODE        NOMINATED NODE   READINESS GATES
ingress-nginx   ingress-nginx-controller-ntxl8             1/1     Running   0          29m   172.16.189.100   k8s-ctrl    <none>           <none>
ingress-nginx   ingress-nginx-controller-q8rv6             1/1     Running   0          29m   172.16.189.101   k8s-work1   <none>           <none>
ingress-nginx   ingress-nginx-controller-xs4h7             1/1     Running   0          29m   172.16.189.102   k8s-work2   <none>           <none>
kube-system     calico-kube-controllers-55d498b656-qm764   0/1     Running   0          29m   10.199.84.136    k8s-work2   <none>           <none>
kube-system     calico-node-cqvxd                          1/1     Running   0          29m   172.16.189.101   k8s-work1   <none>           <none>
kube-system     calico-node-ttj6t                          1/1     Running   0          29m   172.16.189.102   k8s-work2   <none>           <none>
kube-system     calico-node-wctnp                          1/1     Running   0          29m   172.16.189.100   k8s-ctrl    <none>           <none>
kube-system     coredns-69d6675447-8g6c6                   1/1     Running   0          29m   10.199.182.204   k8s-work1   <none>           <none>
kube-system     coredns-69d6675447-bwblj                   1/1     Running   0          29m   10.199.89.131    k8s-ctrl    <none>           <none>
kube-system     dns-autoscaler-5cb4578f5f-jqbjf            1/1     Running   0          29m   10.199.182.202   k8s-work1   <none>           <none>
kube-system     kube-apiserver-k8s-ctrl                    1/1     Running   1          29m   172.16.189.100   k8s-ctrl    <none>           <none>
kube-system     kube-apiserver-k8s-work1                   0/1     Running   1          29m   172.16.189.101   k8s-work1   <none>           <none>
kube-system     kube-apiserver-k8s-work2                   0/1     Running   1          29m   172.16.189.102   k8s-work2   <none>           <none>
kube-system     kube-controller-manager-k8s-ctrl           1/1     Running   2          29m   172.16.189.100   k8s-ctrl    <none>           <none>
kube-system     kube-controller-manager-k8s-work1          0/1     Running   3          29m   172.16.189.101   k8s-work1   <none>           <none>
kube-system     kube-controller-manager-k8s-work2          0/1     Running   2          29m   172.16.189.102   k8s-work2   <none>           <none>
kube-system     kube-proxy-6pvvl                           1/1     Running   0          29m   172.16.189.100   k8s-ctrl    <none>           <none>
kube-system     kube-proxy-cp9qj                           1/1     Running   0          29m   172.16.189.102   k8s-work2   <none>           <none>
kube-system     kube-proxy-xxkqr                           1/1     Running   0          29m   172.16.189.101   k8s-work1   <none>           <none>
kube-system     kube-scheduler-k8s-ctrl                    1/1     Running   1          29m   172.16.189.100   k8s-ctrl    <none>           <none>
kube-system     kube-scheduler-k8s-work1                   0/1     Running   1          29m   172.16.189.101   k8s-work1   <none>           <none>
kube-system     kube-scheduler-k8s-work2                   0/1     Running   1          29m   172.16.189.102   k8s-work2   <none>           <none>
kube-system     metrics-server-6c8bff4c-pzhzg              0/1     Running   0          29m   10.199.84.137    k8s-work2   <none>           <none>
kube-system     nodelocaldns-9wllx                         1/1     Running   0          29m   172.16.189.101   k8s-work1   <none>           <none>
kube-system     nodelocaldns-b79zv                         1/1     Running   0          29m   172.16.189.100   k8s-ctrl    <none>           <none>
kube-system     nodelocaldns-ldq85                         1/1     Running   0          29m   172.16.189.102   k8s-work2   <none>           <none>
kube-system     registry-n9tw8                             1/1     Running   0          29m   10.199.182.203   k8s-work1   <none>           <none>
root@k8s-ctrl:~/kubespray1-31#

kubespray 本身的命令


ansible-playbook -i inventory/mycluster/hosts.yaml cluster.yml --tags etcd

ansible-playbook -i inventory/mycluster/hosts.yaml cluster.yml --limit k8s-ctrl,k8s-work1,k8s-work2


解决


crictl ps -a | grep kube- | awk '{print $1}' | xargs -r crictl rm -f

ansible all -m shell -a "systemctl restart containerd; systemctl restart kubelet"

kubectl delete po --all --all-namespaces