04kubeadm部署高可用k8s1.21集群

2 阅读7分钟

kubeadm部署高可用k8s1.21集群

大部分操作步骤与上面单Master节点部署相同

部署环境准备

使用CentOS7u9操作系统,然后准备如下配置的五个节点

ipCPU内存硬盘角色主机名
192.168.91.1812C2G40GBmastermaster01
192.168.91.1822C2G40GBmastermaster02
192.168.91.1832C2G40GBmastermaster03
192.168.91.1842C2G40GBworker(node)worker01
192.168.91.1852C2G40GBworker(node)worker02

master01和master02,还用来部署haproxy、keepalived

在所有k8s节点中进行如下操作

# 基础配置
cat >> /etc/hosts << EOF
192.168.91.181  master01
192.168.91.182  master02
192.168.91.183  master03
192.168.91.184  worker01
192.168.91.185  worker02
EOF
yum -y install ntpdate
echo "0 */1 * * * ntpdate time1.aliyun.com" >> /var/spool/cron/root
systemctl disable firewalld && systemctl stop firewalld
sed -ri 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config

# 升级操作系统内核
rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
yum -y install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm
yum --enablerepo="elrepo-kernel" -y install kernel-ml.x86_64
grub2-set-default 0
grub2-mkconfig -o /boot/grub2/grub.cfg
reboot

# 配置内核转发及网桥过滤
cat > /etc/sysctl.d/k8s.conf << EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
vm.swappiness = 0
EOF
modprobe br_netfilter
sysctl -p /etc/sysctl.d/k8s.conf

# 安装ipset及ipvsadm
yum -y install ipset ipvsadm
cat > /etc/sysconfig/modules/ipvs.modules << EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack
EOF
chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules

# 关闭SWAP分区
sed -i 's&/dev/mapper/centos-swap&#/dev/mapper/centos-swap&' /etc/fstab
swapoff -a

# Docker安装
wget -O /etc/yum.repos.d/docker-ce.repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum -y install --setopt=obsoletes=0 docker-ce-20.10.9-3.el7
mkdir /etc/docker
cat << EOF > /etc/docker/daemon.json
{
  "registry-mirrors": ["https://zwyx2n3v.mirror.aliyuncs.com"],
  "exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
systemctl enable docker && systemctl start docker
# 重启
reboot

多机互信

在master节点上生成证书,复制到其它节点即可。复制完成后,可以相互测试登录

# master01
ssh-keygen
cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys
# 按照提示输入多次yes和密码即可
for i in 2 3 4 5; do scp -r /root/.ssh root@192.168.91.18$i:/root/; done

HAProxy及Keepalived部署

HAProxy

# master01和master02

yum -y install haproxy
# haproxy修改配置
mv /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg.back
cat > /etc/haproxy/haproxy.cfg << "EOF"
global
  maxconn  2000
  ulimit-n  16384
  log  127.0.0.1 local0 err
  stats timeout 30s

defaults
  log global
  mode  http
  option  httplog
  timeout connect 5000
  timeout client  50000
  timeout server  50000
  timeout http-request 15s
  timeout http-keep-alive 15s

frontend monitor-in
  bind *:33305
  mode http
  option httplog
  monitor-uri /monitor

frontend k8s-master
  bind 0.0.0.0:16443
  bind 127.0.0.1:16443
  mode tcp
  option tcplog
  tcp-request inspect-delay 5s
  default_backend k8s-master

backend k8s-master
  mode tcp
  option tcplog
  option tcp-check
  balance roundrobin
  default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
  server master01   192.168.91.181:6443  check
  server master02   192.168.91.182:6443  check
  server master03   192.168.91.183:6443  check
EOF

systemctl enable haproxy;systemctl start haproxy

curl master01:33305/monitor
curl master02:33305/monitor
<html><body><h1>200 OK</h1>
Service ready.
</body></html>


Keepalived

# master01和master02

yum -y install keepalived
# keepalived修改配置
mv /etc/keepalived/keepalived.conf /etc/keepalived/keepalived.conf.back
cat > /etc/keepalived/keepalived.conf << "EOF"
! Configuration File for keepalived
global_defs {
    router_id LVS_DEVEL
script_user root
    enable_script_security
}
vrrp_script chk_apiserver {
    script "/etc/keepalived/check_apiserver.sh" # 此脚本需要多独定义,并要调用
    interval 5
    weight -5
    fall 2
    rise 1
}
vrrp_instance VI_1 {
    state MASTER
    interface ens33 # 修改为正在使用的网卡
    mcast_src_ip 192.168.91.181 # 为本master主机对应的IP地址
    virtual_router_id 51
    priority 101
    advert_int 2
    authentication {
        auth_type PASS
        auth_pass abc123
    }
    virtual_ipaddress {
        192.168.91.100 # 为VIP地址
    }
    track_script {
       chk_apiserver # 执行上面检查apiserver脚本
    }
}
EOF

cat > /etc/keepalived/check_apiserver.sh << "EOF"
#!/bin/bash

err=0
for k in $(seq 1 3)
do
    check_code=$(pgrep haproxy)
    if [[ $check_code == "" ]]; then
        err=$(expr $err + 1)
        sleep 1
        continue
    else
        err=0
        break
    fi
done

if [[ $err != "0" ]]; then
    echo "systemctl stop keepalived"
    /usr/bin/systemctl stop keepalived
    exit 1
else
    exit 0
fi
EOF

chmod +x /etc/keepalived/check_apiserver.sh

# master02 对配置文件做单独的修改
sed -i 's/192.168.91.181/192.168.91.182/' /etc/keepalived/keepalived.conf
sed -i 's/priority 101/priority 99/' /etc/keepalived/keepalived.conf

# master01和master02
systemctl enable keepalived;systemctl start keepalived

# 验证高可用集群可用性
# master01
ip a s ens33
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 00:0c:29:d7:04:3b brd ff:ff:ff:ff:ff:ff
    inet 192.168.91.181/24 brd 192.168.91.255 scope global noprefixroute ens33
       valid_lft forever preferred_lft forever
    inet 192.168.91.100/32 scope global ens33
       valid_lft forever preferred_lft forever
    inet6 fe80::8ef0:ab61:8b17:dc27/64 scope link noprefixroute
       valid_lft forever preferred_lft forever

ss -anput | grep ":16443"
tcp    LISTEN     0      2000      *:16443                 *:*                   users:(("haproxy",pid=2049,fd=5))
tcp    LISTEN     0      2000   127.0.0.1:16443                 *:*                   users:(("haproxy",pid=2049,fd=6))

# master02
ss -anput | grep ":16443"
tcp    LISTEN     0      2000      *:16443                 *:*                   users:(("haproxy",pid=1963,fd=5))
tcp    LISTEN     0      2000   127.0.0.1:16443                 *:*                   users:(("haproxy",pid=1963,fd=6))

k8s1.21集群部署

kubeadmkubeletkubectl
版本1.21.01.21.01.21.0
安装位置集群所有主机集群所有主机集群所有主机
作用初始化集群、管理集群等用于接收api-server指令,对pod生命周期进行管理集群应用命令行管理工具

在上面准备的所有节点中操作

准备

# 阿里云YUM源
cat > /etc/yum.repos.d/kubernetes.repo << EOF 
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

# 安装指定版本
yum -y install --setopt=obsoletes=0 kubeadm-1.21.0-0  kubelet-1.21.0-0 kubectl-1.21.0-0
# 配置kubelet
sed -ri 's/KUBELET_EXTRA_ARGS=/KUBELET_EXTRA_ARGS="--cgroup-driver=systemd"/' /etc/sysconfig/kubelet
systemctl enable kubelet

# 集群镜像准备
cat > image_download.sh << "EOF"
#!/bin/bash
images_list='
k8s.gcr.io/kube-apiserver:v1.21.0
k8s.gcr.io/kube-controller-manager:v1.21.0
k8s.gcr.io/kube-scheduler:v1.21.0
k8s.gcr.io/kube-proxy:v1.21.0
k8s.gcr.io/pause:3.4.1
k8s.gcr.io/etcd:3.4.13-0
k8s.gcr.io/coredns/coredns:v1.8.0'
for image in $images_list
do
    # k8s.gcr.io的相关镜像在国内无法访问,需要替换成registry.aliyuncs.com/google_containers
    image_aliyun=`echo $image | sed 's#k8s.gcr.io#registry.aliyuncs.com/google_containers#'`
    # coredns需要做特殊处理,删除coredns/
    image_aliyun=`echo $image_aliyun | sed 's#coredns/##'`
    # 去阿里云拉取对应的镜像
    docker pull $image_aliyun
    # 重新打成k8s.gcr.io命名的镜像
    docker tag $image_aliyun $image
    # 删除阿里云对应的镜像
    docker rmi $image_aliyun
done
EOF

# 执行脚本下载镜像
sh image_download.sh

master01上进行集群初始化

准备kubeadm-config.yaml配置文件

apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: 7t2weq.bjbawausm0jaxury
  ttl: 24h0m0s
  usages:
  - signing
  - authentication
kind: InitConfiguration
localAPIEndpoint:
  advertiseAddress: 192.168.91.181
  bindPort: 6443
nodeRegistration:
  criSocket: /var/run/dockershim.sock
  name: master01
  taints:
  - effect: NoSchedule
    key: node-role.kubernetes.io/master
---
apiServer:
  certSANs:
  - 192.168.91.100
  timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: 192.168.91.100:16443
controllerManager: {}
dns:
  type: CoreDNS
etcd:
  local:
    dataDir: /var/lib/etcd
imageRepository: 
kind: ClusterConfiguration
kubernetesVersion: v1.21.0
networking:
  dnsDomain: cluster.local
  podSubnet: 10.244.0.0/16
  serviceSubnet: 10.96.0.0/12
scheduler: {}
# master01
# 输出内容,一定保留,便于后继操作使用
kubeadm init --config /root/kubeadm-config.yaml --upload-certs
Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of the control-plane node running the following command on each as root:

  kubeadm join 192.168.91.100:16443 --token 7t2weq.bjbawausm0jaxury \
        --discovery-token-ca-cert-hash sha256:0280fdb7b465ff2f0d0e5b408fba1ff61b406558e2584015f866b83f08720740 \
        --control-plane --certificate-key 5f37642b8eed975cb5fba10bcdc7696890ca00d7952c042b0ecb6d78d7d6989a

Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.91.100:16443 --token 7t2weq.bjbawausm0jaxury \
        --discovery-token-ca-cert-hash sha256:0280fdb7b465ff2f0d0e5b408fba1ff61b406558e2584015f866b83f08720740
        
# 集群应用客户端管理集群文件准备
mkdir -p $HOME/.kube
cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config

集群网络准备

calico安装

# 下载operator资源清单文件
wget https://projectcalico.docs.tigera.io/archive/v3.23/manifests/tigera-operator.yaml
# 应用资源清单文件,创建operator
kubectl apply -f tigera-operator.yaml
# 通过自定义资源方式安装
wget https://projectcalico.docs.tigera.io/archive/v3.23/manifests/custom-resources.yaml
# 修改为使用kubeadm init ----pod-network-cidr对应的IP地址段
sed -i 's/192.168/10.244/' custom-resources.yaml
# 应用资源清单文件
kubectl apply -f custom-resources.yaml
# 监视calico-sysem命名空间中pod运行情况,等待所有pod的STATUS变成Running
watch kubectl get pods -n calico-system
# 删除 master 上的 taint
kubectl taint nodes --all node-role.kubernetes.io/master-

# 已经全部运行
kubectl get pods -n calico-system
NAME                                      READY   STATUS    RESTARTS   AGE
calico-kube-controllers-988c95d46-kjrsw   1/1     Running   0          4m43s
calico-node-snm4x                         1/1     Running   0          4m43s
calico-typha-c5ccbbdb8-2sqbn              1/1     Running   0          4m43s

# 查看kube-system命名空间中coredns状态,处于Running状态表明联网成功。
kubectl get pods -n kube-system
NAME                               READY   STATUS    RESTARTS   AGE
coredns-558bd4d5db-9x9wm           1/1     Running   0          10m
coredns-558bd4d5db-vb62w           1/1     Running   0          10m
etcd-master01                      1/1     Running   0          10m
kube-apiserver-master01            1/1     Running   0          10m
kube-controller-manager-master01   1/1     Running   0          10m
kube-proxy-7n758                   1/1     Running   0          10m
kube-scheduler-master01            1/1     Running   0          10m

calico客户端安装

curl -L https://github.com/projectcalico/calico/releases/download/v3.23.5/calicoctl-linux-amd64 -o calicoctl
mv calicoctl /usr/bin/
chmod +x /usr/bin/calicoctl
# 查看calicoctl版本
calicoctl  version
Client Version:    v3.23.5
Git commit:        9e0398360
Cluster Version:   v3.23.5
Cluster Type:      typha,kdd,k8s,operator,bgp,kubeadm

# 通过~/.kube/config连接kubernetes集群,查看已运行节点
DATASTORE_TYPE=kubernetes KUBECONFIG=~/.kube/config calicoctl get nodes
NAME
master01

集群其它节点加入集群

因容器镜像下载较慢,可能会导致报错,主要错误为没有准备好cni(集群网络插件),如有网络,请耐心等待即可

# 其它Master节点加入集群
# master02和master03
kubeadm join 192.168.91.100:16443 --token 7t2weq.bjbawausm0jaxury \
      --discovery-token-ca-cert-hash sha256:0280fdb7b465ff2f0d0e5b408fba1ff61b406558e2584015f866b83f08720740 \
      --control-plane --certificate-key 5f37642b8eed975cb5fba10bcdc7696890ca00d7952c042b0ecb6d78d7d6989a
# 集群工作节点加入集群
# worker01和worker02
kubeadm join 192.168.91.100:16443 --token 7t2weq.bjbawausm0jaxury \
        --discovery-token-ca-cert-hash sha256:0280fdb7b465ff2f0d0e5b408fba1ff61b406558e2584015f866b83f08720740

验证集群可用性

# master01
# 监视calico-sysem命名空间中pod运行情况,等待所有pod的STATUS变成Running
watch kubectl get pods -n calico-system
# 查看所有的节点
kubectl get nodes
NAME       STATUS   ROLES                  AGE     VERSION
master01   Ready    control-plane,master   23m     v1.21.0
master02   Ready    control-plane,master   5m46s   v1.21.0
master03   Ready    control-plane,master   4m3s    v1.21.0
worker01   Ready    <none>                 4m4s    v1.21.0
worker02   Ready    <none>                 4m2s    v1.21.0

# 查看集群健康情况
# 理想状态
kubectl get cs
NAME                 STATUS    MESSAGE             ERROR
controller-manager   Healthy   ok
scheduler            Healthy   ok
etcd-0               Healthy   {"health":"true"}

# 真实情况
kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME                 STATUS      MESSAGE                                                                                       ERROR
controller-manager   Unhealthy   Get "http://127.0.0.1:10252/healthz": dial tcp 127.0.0.1:10252: connect: connection refused
scheduler            Unhealthy   Get "http://127.0.0.1:10251/healthz": dial tcp 127.0.0.1:10251: connect: connection refused
etcd-0               Healthy     {"health":"true"}

# 查看kubernetes集群pod运行情况
kubectl get pods -n kube-system
NAME                               READY   STATUS    RESTARTS   AGE
coredns-558bd4d5db-9x9wm           1/1     Running   0          24m
coredns-558bd4d5db-vb62w           1/1     Running   0          24m
etcd-master01                      1/1     Running   0          24m
etcd-master02                      1/1     Running   0          3m53s
etcd-master03                      1/1     Running   0          5m4s
kube-apiserver-master01            1/1     Running   0          24m
kube-apiserver-master02            1/1     Running   1          5m1s
kube-apiserver-master03            1/1     Running   0          5m4s
kube-controller-manager-master01   1/1     Running   1          24m
kube-controller-manager-master02   1/1     Running   0          4m10s
kube-controller-manager-master03   1/1     Running   0          5m4s
kube-proxy-7n758                   1/1     Running   0          24m
kube-proxy-7vww6                   1/1     Running   0          5m8s
kube-proxy-cjlhl                   1/1     Running   0          6m52s
kube-proxy-htpj5                   1/1     Running   0          5m10s
kube-proxy-pn6nh                   1/1     Running   0          5m9s
kube-scheduler-master01            1/1     Running   1          24m
kube-scheduler-master02            1/1     Running   0          3m47s
kube-scheduler-master03            1/1     Running   0          5m4s

# 查看calico-system命名空间中pod运行情况
kubectl get pods -n calico-system
NAME                                      READY   STATUS    RESTARTS   AGE
calico-kube-controllers-988c95d46-kjrsw   1/1     Running   1          19m
calico-node-ftxlk                         1/1     Running   0          7m39s
calico-node-hp58g                         1/1     Running   0          5m56s
calico-node-kvjw9                         1/1     Running   0          5m57s
calico-node-snm4x                         1/1     Running   0          19m
calico-node-tchnx                         1/1     Running   0          5m55s
calico-typha-c5ccbbdb8-2sqbn              1/1     Running   0          19m
calico-typha-c5ccbbdb8-v25cl              1/1     Running   0          5m40s
calico-typha-c5ccbbdb8-wh7gx              1/1     Running   0          5m40s

# 查看网络节点是否添加
DATASTORE_TYPE=kubernetes KUBECONFIG=~/.kube/config calicoctl get nodes
NAME
master01
master02
master03
worker01
worker02