k8s 部署metric server

429 阅读4分钟

yaml文件

# 下载文件
wget https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml

修改yaml文件的镜像地址

apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: metrics-server
    rbac.authorization.k8s.io/aggregate-to-admin: "true"
    rbac.authorization.k8s.io/aggregate-to-edit: "true"
    rbac.authorization.k8s.io/aggregate-to-view: "true"
  name: system:aggregated-metrics-reader
rules:
- apiGroups:
  - metrics.k8s.io
  resources:
  - pods
  - nodes
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: metrics-server
  name: system:metrics-server
rules:
- apiGroups:
  - ""
  resources:
  - nodes/metrics
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - pods
  - nodes
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server-auth-reader
  namespace: kube-system
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server:system:auth-delegator
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:auth-delegator
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: system:metrics-server
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:metrics-server
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
spec:
  ports:
  - name: https
    port: 443
    protocol: TCP
    targetPort: https
  selector:
    k8s-app: metrics-server
---
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
spec:
  selector:
    matchLabels:
      k8s-app: metrics-server
  strategy:
    rollingUpdate:
      maxUnavailable: 0
  template:
    metadata:
      labels:
        k8s-app: metrics-server
    spec:
      containers:
      - args:
        - --cert-dir=/tmp
        - --secure-port=4443
        - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
        - --kubelet-use-node-status-port
        - --metric-resolution=15s
        image: registry.aliyuncs.com/google_containers/metrics-server:v0.6.3  # 此处镜像需要修改为阿里云的
        imagePullPolicy: IfNotPresent
        livenessProbe:
          failureThreshold: 3
          httpGet:
            path: /livez
            port: https
            scheme: HTTPS
          periodSeconds: 10
        name: metrics-server
        ports:
        - containerPort: 4443
          name: https
          protocol: TCP
        readinessProbe:
          failureThreshold: 3
          httpGet:
            path: /readyz
            port: https
            scheme: HTTPS
          initialDelaySeconds: 20
          periodSeconds: 10
        resources:
          requests:
            cpu: 100m
            memory: 200Mi
        securityContext:
          allowPrivilegeEscalation: false
          readOnlyRootFilesystem: true
          runAsNonRoot: true
          runAsUser: 1000
        volumeMounts:
        - mountPath: /tmp
          name: tmp-dir
      nodeSelector:
        kubernetes.io/os: linux
      priorityClassName: system-cluster-critical
      serviceAccountName: metrics-server
      volumes:
      - emptyDir: {}
        name: tmp-dir
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
  labels:
    k8s-app: metrics-server
  name: v1beta1.metrics.k8s.io
spec:
  group: metrics.k8s.io
  groupPriorityMinimum: 100
  insecureSkipTLSVerify: true
  service:
    name: metrics-server
    namespace: kube-system
  version: v1beta1
  versionPriority: 100

应用yaml文件

kubectl apply -f components.yaml

查看pod发现没有正常运行

image.png

查看pod日志,发现有错误

E0517 11:49:50.701971       1 scraper.go:140] "Failed to scrape node" err="Get \"https://172.16.0.3:10250/metrics/resource\": x509: cannot validate certificate for 172.16.0.3 because it doesn't contain any IP SANs" node="cloudcone"
I0517 11:49:56.597379       1 server.go:187] "Failed probe" probe="metric-storage-ready" err="no metrics to serve"

解决办法一

# 传递 ‘–kubelet-insecure-tls’ 以禁用证书验证即可(不建议在生产中)
# 执行命令
kubectl edit deployment.apps/metrics-server -n kube-system
# 将 –kubelet-insecure-tls 添加到“spec.template.spec.containers.args”(YAML Path)下的列表中即可
spec:
      containers:
      - args:
        - --cert-dir=/tmp
        - --secure-port=4443
        - --kubelet-insecure-tls
        - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
        - --kubelet-use-node-status-port
        image: registry.cn-hangzhou.aliyuncs.com/ks8_kk/metrics-server:v0.4.2
        imagePullPolicy: IfNotPresent
        livenessProbe:

解决办法二,启用TLS Bootstrap 证书签发

在各个节点修改kubelet的配置文件:vim /var/lib/kubelet/config.yaml

添加如下内容,serverTLSBootstrap: true

image.png

# 重启kubelet
systemctl restart kubelet.service
查看认证信息
[root@master dashboard-2.7.0]# kubectl get csr
NAME        AGE     SIGNERNAME                      REQUESTOR               REQUESTEDDURATION   CONDITION
csr-4tw7j   5m2s    kubernetes.io/kubelet-serving   system:node:master      <none>              Pending
csr-9hwml   2m33s   kubernetes.io/kubelet-serving   system:node:node2       <none>              Pending
csr-b48k9   3m45s   kubernetes.io/kubelet-serving   system:node:master2     <none>              Pending
csr-fqndz   62s     kubernetes.io/kubelet-serving   system:node:node5       <none>              Pending
csr-lnv7q   2m3s    kubernetes.io/kubelet-serving   system:node:node3       <none>              Pending
csr-w79ww   96s     kubernetes.io/kubelet-serving   system:node:node4       <none>              Pending
csr-xmj8c   3m13s   kubernetes.io/kubelet-serving   system:node:node1       <none>              Pending
csr-ztk97   16s     kubernetes.io/kubelet-serving   system:node:cloudcone   <none>              Pending
通过认证
[root@master dashboard-2.7.0]# kubectl certificate approve csr-4tw7j csr-9hwml csr-b48k9 csr-fqndz csr-lnv7q csr-w79ww  csr-xmj8c csr-ztk97 
certificatesigningrequest.certificates.k8s.io/csr-4tw7j approved
certificatesigningrequest.certificates.k8s.io/csr-9hwml approved
certificatesigningrequest.certificates.k8s.io/csr-b48k9 approved
certificatesigningrequest.certificates.k8s.io/csr-fqndz approved
certificatesigningrequest.certificates.k8s.io/csr-lnv7q approved
certificatesigningrequest.certificates.k8s.io/csr-w79ww approved
certificatesigningrequest.certificates.k8s.io/csr-xmj8c approved
certificatesigningrequest.certificates.k8s.io/csr-ztk97 approved
再次查看认证信息
[root@master dashboard-2.7.0]# kubectl get csr
NAME        AGE     SIGNERNAME                      REQUESTOR               REQUESTEDDURATION   CONDITION
csr-4tw7j   7m31s   kubernetes.io/kubelet-serving   system:node:master      <none>              Approved,Issued
csr-9hwml   5m2s    kubernetes.io/kubelet-serving   system:node:node2       <none>              Approved,Issued
csr-b48k9   6m14s   kubernetes.io/kubelet-serving   system:node:master2     <none>              Approved,Issued
csr-fqndz   3m31s   kubernetes.io/kubelet-serving   system:node:node5       <none>              Approved,Issued
csr-lnv7q   4m32s   kubernetes.io/kubelet-serving   system:node:node3       <none>              Approved,Issued
csr-w79ww   4m5s    kubernetes.io/kubelet-serving   system:node:node4       <none>              Approved,Issued
csr-xmj8c   5m42s   kubernetes.io/kubelet-serving   system:node:node1       <none>              Approved,Issued
csr-ztk97   2m45s   kubernetes.io/kubelet-serving   system:node:cloudcone   <none>              Approved,Issued

问题报错二:

解决办法和前面一样

  Normal   Scheduled  2m27s               default-scheduler  Successfully assigned kube-system/metrics-server-6cff795575-d2wlg to node2
  Normal   Pulling    2m27s               kubelet            Pulling image "registry.aliyuncs.com/google_containers/metrics-server:v0.6.4"
  Normal   Pulled     2m21s               kubelet            Successfully pulled image "registry.aliyuncs.com/google_containers/metrics-server:v0.6.4" in 6.108s (6.108s including waiting)
  Normal   Created    2m20s               kubelet            Created container metrics-server
  Normal   Started    2m20s               kubelet            Started container metrics-server
  Warning  Unhealthy  7s (x13 over 117s)  kubelet            Readiness probe failed: HTTP probe failed with statuscode: 500

一些常用资源查看命令

# 查看节点资源占用情况
[root@master dashboard-2.7.0]# kubectl top node
NAME        CPU(cores)   CPU%   MEMORY(bytes)   MEMORY%   
cloudcone   244m         12%    1465Mi          85%       
master      156m         7%     2422Mi          66%       
master2     35m          3%     1156Mi          66%       
node1       140m         7%     2153Mi          59%       
node2       113m         5%     2270Mi          63%       
node3       106m         5%     1470Mi          78%       
node4       164m         8%     1812Mi          49%       
node5       300m         15%    2484Mi          68% 
# 查看pod资源占用情况,使用-n指定名称空间
[root@master dashboard-2.7.0]# kubectl top pod -n k8s-util
NAME                       CPU(cores)   MEMORY(bytes)   
k8sutil-deployment-2tcqs   0m           1Mi             
k8sutil-deployment-9bsks   0m           1Mi             
k8sutil-deployment-hsb9w   0m           0Mi             
k8sutil-deployment-k5475   0m           0Mi             
k8sutil-deployment-kfnpq   0m           0Mi             
k8sutil-deployment-wgzcn   0m           0Mi             
k8sutil-deployment-zn64b   0m           0Mi     

参考文档