问题表现
root@server-01:~kubectl logs -n kube-flannel kube-flannel-ds-g84gc -c kube-flannel -f-f
Error from server: Get "https://192.168.174.128:10250/containerLogs/kube-flannel/kube-flannel-ds-g84gc/kube-flannel?follow=true": proxyconnect tcp: dial tcp 192.168.43.196:1023: connect: connection refused
问题解释
控制平面虚拟机在之前初始化的时候会将当时 http_proxy 和 https_proxy 环境变量写进 apiserver manifest 当中,apiserver 会自动走代理,但是当内网 IP 发生变化时,代理 IP 还是指向旧的,导致网络错误。
root@server-01:/etc/kubernetes# cat /etc/kubernetes/manifests/kube-apiserver.yaml
apiVersion: v1
kind: Pod
metadata:
annotations:
kubeadm.kubernetes.io/kube-apiserver.advertise-address.endpoint: 192.168.174.128:6443
labels:
component: kube-apiserver
tier: control-plane
name: kube-apiserver
namespace: kube-system
spec:
containers:
- command:
- kube-apiserver
- --advertise-address=192.168.174.128
- --allow-privileged=true
- --authorization-mode=Node,RBAC
- --client-ca-file=/etc/kubernetes/pki/ca.crt
- --enable-admission-plugins=NodeRestriction
- --enable-bootstrap-token-auth=true
- --etcd-cafile=/etc/kubernetes/pki/etcd/ca.crt
- --etcd-certfile=/etc/kubernetes/pki/apiserver-etcd-client.crt
- --etcd-keyfile=/etc/kubernetes/pki/apiserver-etcd-client.key
- --etcd-servers=https://127.0.0.1:2379
- --kubelet-client-certificate=/etc/kubernetes/pki/apiserver-kubelet-client.crt
- --kubelet-client-key=/etc/kubernetes/pki/apiserver-kubelet-client.key
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.crt
- --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client.key
- --requestheader-allowed-names=front-proxy-client
- --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
- --requestheader-extra-headers-prefix=X-Remote-Extra-
- --requestheader-group-headers=X-Remote-Group
- --requestheader-username-headers=X-Remote-User
- --secure-port=6443
- --service-account-issuer=https://kubernetes.default.svc.cluster.local
- --service-account-key-file=/etc/kubernetes/pki/sa.pub
- --service-account-signing-key-file=/etc/kubernetes/pki/sa.key
- --service-cluster-ip-range=10.96.0.0/12
- --tls-cert-file=/etc/kubernetes/pki/apiserver.crt
- --tls-private-key-file=/etc/kubernetes/pki/apiserver.key
env:
- name: http_proxy
value: http://192.168.43.196:1023
- name: https_proxy
value: http://192.168.43.196:1023
image: registry.k8s.io/kube-apiserver:v1.34.1
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 192.168.174.128
path: /livez
port: probe-port
scheme: HTTPS
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
name: kube-apiserver
ports:
- containerPort: 6443
name: probe-port
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
host: 192.168.174.128
path: /readyz
port: probe-port
scheme: HTTPS
periodSeconds: 1
timeoutSeconds: 15
resources:
requests:
cpu: 250m
startupProbe:
failureThreshold: 24
httpGet:
host: 192.168.174.128
path: /livez
port: probe-port
scheme: HTTPS
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
volumeMounts:
- mountPath: /etc/ssl/certs
name: ca-certs
readOnly: true
- mountPath: /etc/ca-certificates
name: etc-ca-certificates
readOnly: true
- mountPath: /etc/kubernetes/pki
name: k8s-certs
readOnly: true
- mountPath: /usr/local/share/ca-certificates
name: usr-local-share-ca-certificates
readOnly: true
- mountPath: /usr/share/ca-certificates
name: usr-share-ca-certificates
readOnly: true
hostNetwork: true
priority: 2000001000
priorityClassName: system-node-critical
securityContext:
seccompProfile:
type: RuntimeDefault
volumes:
- hostPath:
path: /etc/ssl/certs
type: DirectoryOrCreate
name: ca-certs
- hostPath:
path: /etc/ca-certificates
type: DirectoryOrCreate
name: etc-ca-certificates
- hostPath:
path: /etc/kubernetes/pki
type: DirectoryOrCreate
name: k8s-certs
- hostPath:
path: /usr/local/share/ca-certificates
type: DirectoryOrCreate
name: usr-local-share-ca-certificates
- hostPath:
path: /usr/share/ca-certificates
type: DirectoryOrCreate
name: usr-share-ca-certificates
status: {}
问题解决
实际上 apiserver 应该不需要设定代理,我们清除配置文件中的环境变量 http_proxy 和 https_proxy 就好,修改 /etc/kubernetes/manifests/kube-apiserver.yaml 后,kubelet 会自动检测到变化并重启 API Server pod,无需手动重启。
同理需要处理的组件的配置
- kube-controller-manager
- kube-scheduler.yaml
kubeproxy 的 pod 也继承了当时 kubeadm init 时服务器上的环境变量,所以旧 IP 代理的配置在 kubeproxy pod 中也存在
- 报错信息
root@server-01:/etc/kubernetes# kubectl logs -n kube-system -l k8s-app=kube-proxy --tail=50
I1120 03:24:14.014466 1 server_linux.go:53] "Using iptables proxy"
I1120 03:24:14.152447 1 shared_informer.go:349] "Waiting for caches to sync" controller="node informer cache"
E1120 03:24:35.244357 1 reflector.go:205] "Failed to watch" err="failed to list *v1.Node: Get \"https://192.168.174.128:6443/api/v1/nodes?fieldSelector=metadata.name%3Dserver-02&limit=500&resourceVersion=0\": proxyconnect tcp: dial tcp 192.168.43.196:1023: connect: connection refused" logger="UnhandledError" reflector="k8s.io/client-go/informers/factory.go:160" type="*v1.Node"
E1120 03:24:57.721330 1 reflector.go:205] "Failed to watch" err="failed to list *v1.Node: Get \"https://192.168.174.128:6443/api/v1/nodes?fieldSelector=metadata.name%3Dserver-02&limit=500&resourceVersion=0\": proxyconnect tcp: dial tcp 192.168.43.196:1023: connect: connection refused" logger="UnhandledError" reflector="k8s.io/client-go/informers/factory.go:160" type="*v1.Node"
E1120 03:25:20.501926 1 reflector.go:205] "Failed to watch" err="failed to list *v1.Node: Get \"https://192.168.174.128:6443/api/v1/nodes?fieldSelector=metadata.name%3Dserver-02&limit=500&resourceVersion=0\": proxyconnect tcp: dial tcp 192.168.43.196:1023: connect: connection refused" logger="UnhandledError" reflector="k8s.io/client-go/informers/factory.go:160" type="*v1.Node"
- 配置修改 具体的旧配置展示
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kube-proxy
namespace: kube-system
uid: a88cf688-e5bf-48ee-898a-6496454a1224
resourceVersion: '18030'
generation: 1
creationTimestamp: '2025-10-30T08:45:43Z'
labels:
k8s-app: kube-proxy
annotations:
deprecated.daemonset.template.generation: '1'
selfLink: /apis/apps/v1/namespaces/kube-system/daemonsets/kube-proxy
status:
currentNumberScheduled: 4
numberMisscheduled: 0
desiredNumberScheduled: 4
numberReady: 4
observedGeneration: 1
updatedNumberScheduled: 4
numberAvailable: 4
spec:
selector:
matchLabels:
k8s-app: kube-proxy
template:
metadata:
labels:
k8s-app: kube-proxy
spec:
volumes:
- name: kube-proxy
configMap:
name: kube-proxy
defaultMode: 420
- name: xtables-lock
hostPath:
path: /run/xtables.lock
type: FileOrCreate
- name: lib-modules
hostPath:
path: /lib/modules
type: ''
containers:
- name: kube-proxy
image: registry.k8s.io/kube-proxy:v1.34.1
command:
- /usr/local/bin/kube-proxy
- '--config=/var/lib/kube-proxy/config.conf'
- '--hostname-override=$(NODE_NAME)'
env:
- name: NODE_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
- name: http_proxy
value: http://192.168.43.196:1023
- name: https_proxy
value: http://192.168.43.196:1023
resources: {}
volumeMounts:
- name: kube-proxy
mountPath: /var/lib/kube-proxy
- name: xtables-lock
mountPath: /run/xtables.lock
- name: lib-modules
readOnly: true
mountPath: /lib/modules
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
imagePullPolicy: IfNotPresent
securityContext:
privileged: true
restartPolicy: Always
terminationGracePeriodSeconds: 30
dnsPolicy: ClusterFirst
nodeSelector:
kubernetes.io/os: linux
serviceAccountName: kube-proxy
serviceAccount: kube-proxy
hostNetwork: true
securityContext: {}
schedulerName: default-scheduler
tolerations:
- operator: Exists
priorityClassName: system-node-critical
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
maxSurge: 0
revisionHistoryLimit: 10
编辑 daemonset 配置文件,把旧代理 env 删除
kubectl edit ds -n kube-system kube-proxy
编辑后会滚动更新所有 kubeproxy pod,需要等待一段时间 kubeproxy 恢复正常后,kubectl-flannel 重启后也会恢复正常