kubeadm部署高可用k8s1.21集群
大部分操作步骤与上面单Master节点部署相同
部署环境准备
使用CentOS7u9操作系统,然后准备如下配置的五个节点
ip | CPU | 内存 | 硬盘 | 角色 | 主机名 |
---|---|---|---|---|---|
192.168.91.181 | 2C | 2G | 40GB | master | master01 |
192.168.91.182 | 2C | 2G | 40GB | master | master02 |
192.168.91.183 | 2C | 2G | 40GB | master | master03 |
192.168.91.184 | 2C | 2G | 40GB | worker(node) | worker01 |
192.168.91.185 | 2C | 2G | 40GB | worker(node) | worker02 |
master01和master02,还用来部署haproxy、keepalived
在所有k8s节点中进行如下操作
# 基础配置
cat >> /etc/hosts << EOF
192.168.91.181 master01
192.168.91.182 master02
192.168.91.183 master03
192.168.91.184 worker01
192.168.91.185 worker02
EOF
yum -y install ntpdate
echo "0 */1 * * * ntpdate time1.aliyun.com" >> /var/spool/cron/root
systemctl disable firewalld && systemctl stop firewalld
sed -ri 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
# 升级操作系统内核
rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
yum -y install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm
yum --enablerepo="elrepo-kernel" -y install kernel-ml.x86_64
grub2-set-default 0
grub2-mkconfig -o /boot/grub2/grub.cfg
reboot
# 配置内核转发及网桥过滤
cat > /etc/sysctl.d/k8s.conf << EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
vm.swappiness = 0
EOF
modprobe br_netfilter
sysctl -p /etc/sysctl.d/k8s.conf
# 安装ipset及ipvsadm
yum -y install ipset ipvsadm
cat > /etc/sysconfig/modules/ipvs.modules << EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack
EOF
chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules
# 关闭SWAP分区
sed -i 's&/dev/mapper/centos-swap&#/dev/mapper/centos-swap&' /etc/fstab
swapoff -a
# Docker安装
wget -O /etc/yum.repos.d/docker-ce.repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum -y install --setopt=obsoletes=0 docker-ce-20.10.9-3.el7
mkdir /etc/docker
cat << EOF > /etc/docker/daemon.json
{
"registry-mirrors": ["https://zwyx2n3v.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
systemctl enable docker && systemctl start docker
# 重启
reboot
多机互信
在master节点上生成证书,复制到其它节点即可。复制完成后,可以相互测试登录
# master01
ssh-keygen
cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys
# 按照提示输入多次yes和密码即可
for i in 2 3 4 5; do scp -r /root/.ssh root@192.168.91.18$i:/root/; done
HAProxy及Keepalived部署
HAProxy
# master01和master02
yum -y install haproxy
# haproxy修改配置
mv /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg.back
cat > /etc/haproxy/haproxy.cfg << "EOF"
global
maxconn 2000
ulimit-n 16384
log 127.0.0.1 local0 err
stats timeout 30s
defaults
log global
mode http
option httplog
timeout connect 5000
timeout client 50000
timeout server 50000
timeout http-request 15s
timeout http-keep-alive 15s
frontend monitor-in
bind *:33305
mode http
option httplog
monitor-uri /monitor
frontend k8s-master
bind 0.0.0.0:16443
bind 127.0.0.1:16443
mode tcp
option tcplog
tcp-request inspect-delay 5s
default_backend k8s-master
backend k8s-master
mode tcp
option tcplog
option tcp-check
balance roundrobin
default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
server master01 192.168.91.181:6443 check
server master02 192.168.91.182:6443 check
server master03 192.168.91.183:6443 check
EOF
systemctl enable haproxy;systemctl start haproxy
curl master01:33305/monitor
curl master02:33305/monitor
<html><body><h1>200 OK</h1>
Service ready.
</body></html>
Keepalived
# master01和master02
yum -y install keepalived
# keepalived修改配置
mv /etc/keepalived/keepalived.conf /etc/keepalived/keepalived.conf.back
cat > /etc/keepalived/keepalived.conf << "EOF"
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
script_user root
enable_script_security
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh" # 此脚本需要多独定义,并要调用
interval 5
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 {
state MASTER
interface ens33 # 修改为正在使用的网卡
mcast_src_ip 192.168.91.181 # 为本master主机对应的IP地址
virtual_router_id 51
priority 101
advert_int 2
authentication {
auth_type PASS
auth_pass abc123
}
virtual_ipaddress {
192.168.91.100 # 为VIP地址
}
track_script {
chk_apiserver # 执行上面检查apiserver脚本
}
}
EOF
cat > /etc/keepalived/check_apiserver.sh << "EOF"
#!/bin/bash
err=0
for k in $(seq 1 3)
do
check_code=$(pgrep haproxy)
if [[ $check_code == "" ]]; then
err=$(expr $err + 1)
sleep 1
continue
else
err=0
break
fi
done
if [[ $err != "0" ]]; then
echo "systemctl stop keepalived"
/usr/bin/systemctl stop keepalived
exit 1
else
exit 0
fi
EOF
chmod +x /etc/keepalived/check_apiserver.sh
# master02 对配置文件做单独的修改
sed -i 's/192.168.91.181/192.168.91.182/' /etc/keepalived/keepalived.conf
sed -i 's/priority 101/priority 99/' /etc/keepalived/keepalived.conf
# master01和master02
systemctl enable keepalived;systemctl start keepalived
# 验证高可用集群可用性
# master01
ip a s ens33
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 00:0c:29:d7:04:3b brd ff:ff:ff:ff:ff:ff
inet 192.168.91.181/24 brd 192.168.91.255 scope global noprefixroute ens33
valid_lft forever preferred_lft forever
inet 192.168.91.100/32 scope global ens33
valid_lft forever preferred_lft forever
inet6 fe80::8ef0:ab61:8b17:dc27/64 scope link noprefixroute
valid_lft forever preferred_lft forever
ss -anput | grep ":16443"
tcp LISTEN 0 2000 *:16443 *:* users:(("haproxy",pid=2049,fd=5))
tcp LISTEN 0 2000 127.0.0.1:16443 *:* users:(("haproxy",pid=2049,fd=6))
# master02
ss -anput | grep ":16443"
tcp LISTEN 0 2000 *:16443 *:* users:(("haproxy",pid=1963,fd=5))
tcp LISTEN 0 2000 127.0.0.1:16443 *:* users:(("haproxy",pid=1963,fd=6))
k8s1.21集群部署
kubeadm | kubelet | kubectl | |
---|---|---|---|
版本 | 1.21.0 | 1.21.0 | 1.21.0 |
安装位置 | 集群所有主机 | 集群所有主机 | 集群所有主机 |
作用 | 初始化集群、管理集群等 | 用于接收api-server指令,对pod生命周期进行管理 | 集群应用命令行管理工具 |
在上面准备的所有节点中操作
准备
# 阿里云YUM源
cat > /etc/yum.repos.d/kubernetes.repo << EOF
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
# 安装指定版本
yum -y install --setopt=obsoletes=0 kubeadm-1.21.0-0 kubelet-1.21.0-0 kubectl-1.21.0-0
# 配置kubelet
sed -ri 's/KUBELET_EXTRA_ARGS=/KUBELET_EXTRA_ARGS="--cgroup-driver=systemd"/' /etc/sysconfig/kubelet
systemctl enable kubelet
# 集群镜像准备
cat > image_download.sh << "EOF"
#!/bin/bash
images_list='
k8s.gcr.io/kube-apiserver:v1.21.0
k8s.gcr.io/kube-controller-manager:v1.21.0
k8s.gcr.io/kube-scheduler:v1.21.0
k8s.gcr.io/kube-proxy:v1.21.0
k8s.gcr.io/pause:3.4.1
k8s.gcr.io/etcd:3.4.13-0
k8s.gcr.io/coredns/coredns:v1.8.0'
for image in $images_list
do
# k8s.gcr.io的相关镜像在国内无法访问,需要替换成registry.aliyuncs.com/google_containers
image_aliyun=`echo $image | sed 's#k8s.gcr.io#registry.aliyuncs.com/google_containers#'`
# coredns需要做特殊处理,删除coredns/
image_aliyun=`echo $image_aliyun | sed 's#coredns/##'`
# 去阿里云拉取对应的镜像
docker pull $image_aliyun
# 重新打成k8s.gcr.io命名的镜像
docker tag $image_aliyun $image
# 删除阿里云对应的镜像
docker rmi $image_aliyun
done
EOF
# 执行脚本下载镜像
sh image_download.sh
master01上进行集群初始化
准备kubeadm-config.yaml配置文件
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: 7t2weq.bjbawausm0jaxury
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 192.168.91.181
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock
name: master01
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
certSANs:
- 192.168.91.100
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: 192.168.91.100:16443
controllerManager: {}
dns:
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
imageRepository:
kind: ClusterConfiguration
kubernetesVersion: v1.21.0
networking:
dnsDomain: cluster.local
podSubnet: 10.244.0.0/16
serviceSubnet: 10.96.0.0/12
scheduler: {}
# master01
# 输出内容,一定保留,便于后继操作使用
kubeadm init --config /root/kubeadm-config.yaml --upload-certs
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of the control-plane node running the following command on each as root:
kubeadm join 192.168.91.100:16443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash sha256:0280fdb7b465ff2f0d0e5b408fba1ff61b406558e2584015f866b83f08720740 \
--control-plane --certificate-key 5f37642b8eed975cb5fba10bcdc7696890ca00d7952c042b0ecb6d78d7d6989a
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.91.100:16443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash sha256:0280fdb7b465ff2f0d0e5b408fba1ff61b406558e2584015f866b83f08720740
# 集群应用客户端管理集群文件准备
mkdir -p $HOME/.kube
cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config
集群网络准备
calico安装
# 下载operator资源清单文件
wget https://projectcalico.docs.tigera.io/archive/v3.23/manifests/tigera-operator.yaml
# 应用资源清单文件,创建operator
kubectl apply -f tigera-operator.yaml
# 通过自定义资源方式安装
wget https://projectcalico.docs.tigera.io/archive/v3.23/manifests/custom-resources.yaml
# 修改为使用kubeadm init ----pod-network-cidr对应的IP地址段
sed -i 's/192.168/10.244/' custom-resources.yaml
# 应用资源清单文件
kubectl apply -f custom-resources.yaml
# 监视calico-sysem命名空间中pod运行情况,等待所有pod的STATUS变成Running
watch kubectl get pods -n calico-system
# 删除 master 上的 taint
kubectl taint nodes --all node-role.kubernetes.io/master-
# 已经全部运行
kubectl get pods -n calico-system
NAME READY STATUS RESTARTS AGE
calico-kube-controllers-988c95d46-kjrsw 1/1 Running 0 4m43s
calico-node-snm4x 1/1 Running 0 4m43s
calico-typha-c5ccbbdb8-2sqbn 1/1 Running 0 4m43s
# 查看kube-system命名空间中coredns状态,处于Running状态表明联网成功。
kubectl get pods -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-558bd4d5db-9x9wm 1/1 Running 0 10m
coredns-558bd4d5db-vb62w 1/1 Running 0 10m
etcd-master01 1/1 Running 0 10m
kube-apiserver-master01 1/1 Running 0 10m
kube-controller-manager-master01 1/1 Running 0 10m
kube-proxy-7n758 1/1 Running 0 10m
kube-scheduler-master01 1/1 Running 0 10m
calico客户端安装
curl -L https://github.com/projectcalico/calico/releases/download/v3.23.5/calicoctl-linux-amd64 -o calicoctl
mv calicoctl /usr/bin/
chmod +x /usr/bin/calicoctl
# 查看calicoctl版本
calicoctl version
Client Version: v3.23.5
Git commit: 9e0398360
Cluster Version: v3.23.5
Cluster Type: typha,kdd,k8s,operator,bgp,kubeadm
# 通过~/.kube/config连接kubernetes集群,查看已运行节点
DATASTORE_TYPE=kubernetes KUBECONFIG=~/.kube/config calicoctl get nodes
NAME
master01
集群其它节点加入集群
因容器镜像下载较慢,可能会导致报错,主要错误为没有准备好cni(集群网络插件),如有网络,请耐心等待即可
# 其它Master节点加入集群
# master02和master03
kubeadm join 192.168.91.100:16443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash sha256:0280fdb7b465ff2f0d0e5b408fba1ff61b406558e2584015f866b83f08720740 \
--control-plane --certificate-key 5f37642b8eed975cb5fba10bcdc7696890ca00d7952c042b0ecb6d78d7d6989a
# 集群工作节点加入集群
# worker01和worker02
kubeadm join 192.168.91.100:16443 --token 7t2weq.bjbawausm0jaxury \
--discovery-token-ca-cert-hash sha256:0280fdb7b465ff2f0d0e5b408fba1ff61b406558e2584015f866b83f08720740
验证集群可用性
# master01
# 监视calico-sysem命名空间中pod运行情况,等待所有pod的STATUS变成Running
watch kubectl get pods -n calico-system
# 查看所有的节点
kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 Ready control-plane,master 23m v1.21.0
master02 Ready control-plane,master 5m46s v1.21.0
master03 Ready control-plane,master 4m3s v1.21.0
worker01 Ready <none> 4m4s v1.21.0
worker02 Ready <none> 4m2s v1.21.0
# 查看集群健康情况
# 理想状态
kubectl get cs
NAME STATUS MESSAGE ERROR
controller-manager Healthy ok
scheduler Healthy ok
etcd-0 Healthy {"health":"true"}
# 真实情况
kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
controller-manager Unhealthy Get "http://127.0.0.1:10252/healthz": dial tcp 127.0.0.1:10252: connect: connection refused
scheduler Unhealthy Get "http://127.0.0.1:10251/healthz": dial tcp 127.0.0.1:10251: connect: connection refused
etcd-0 Healthy {"health":"true"}
# 查看kubernetes集群pod运行情况
kubectl get pods -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-558bd4d5db-9x9wm 1/1 Running 0 24m
coredns-558bd4d5db-vb62w 1/1 Running 0 24m
etcd-master01 1/1 Running 0 24m
etcd-master02 1/1 Running 0 3m53s
etcd-master03 1/1 Running 0 5m4s
kube-apiserver-master01 1/1 Running 0 24m
kube-apiserver-master02 1/1 Running 1 5m1s
kube-apiserver-master03 1/1 Running 0 5m4s
kube-controller-manager-master01 1/1 Running 1 24m
kube-controller-manager-master02 1/1 Running 0 4m10s
kube-controller-manager-master03 1/1 Running 0 5m4s
kube-proxy-7n758 1/1 Running 0 24m
kube-proxy-7vww6 1/1 Running 0 5m8s
kube-proxy-cjlhl 1/1 Running 0 6m52s
kube-proxy-htpj5 1/1 Running 0 5m10s
kube-proxy-pn6nh 1/1 Running 0 5m9s
kube-scheduler-master01 1/1 Running 1 24m
kube-scheduler-master02 1/1 Running 0 3m47s
kube-scheduler-master03 1/1 Running 0 5m4s
# 查看calico-system命名空间中pod运行情况
kubectl get pods -n calico-system
NAME READY STATUS RESTARTS AGE
calico-kube-controllers-988c95d46-kjrsw 1/1 Running 1 19m
calico-node-ftxlk 1/1 Running 0 7m39s
calico-node-hp58g 1/1 Running 0 5m56s
calico-node-kvjw9 1/1 Running 0 5m57s
calico-node-snm4x 1/1 Running 0 19m
calico-node-tchnx 1/1 Running 0 5m55s
calico-typha-c5ccbbdb8-2sqbn 1/1 Running 0 19m
calico-typha-c5ccbbdb8-v25cl 1/1 Running 0 5m40s
calico-typha-c5ccbbdb8-wh7gx 1/1 Running 0 5m40s
# 查看网络节点是否添加
DATASTORE_TYPE=kubernetes KUBECONFIG=~/.kube/config calicoctl get nodes
NAME
master01
master02
master03
worker01
worker02