环境
软件版本
- 操作系统:
Rocky Linux 9 kubernetes版本:1.32.1- 运行时容器:
cri-docker - 网络插件:
calico
操作系统分配
| 主机 | 配置 | IP |
|---|---|---|
| K8s-master | 2核4G | 192.168.31.55 |
| K8s-node01 | 2核4G | 192.168.31.86 |
| K8s-node02 | 2核4G | 192.168.31.195 |
操作系统初始化
更换系统软件源
sed -e 's|^mirrorlist=|#mirrorlist=|g' \
-e 's|^#baseurl=http://dl.rockylinux.org/$contentdir|baseurl=https://mirrors.aliyun.com/rockylinux|g' \
-i.bak \
/etc/yum.repos.d/[Rr]ocky*.repo
# 刷新缓存
dnf makecache
修改主机名字
- 所有节点操作
# 修改主机名
hostnamectl set-hostname 主机名
# 重启
reboot
关闭防火墙
- 所有主机操作
#关闭并禁止开机自启
systemctl disable --now firewalld.service
# 查看状态
systemctl status firewalld.service
关闭SELinux
- 所有主机操作
# 永久关闭
sed -ri 's#(SELINUX=)enforcing#\1disabled#g' /etc/selinux/config
# 临时关闭selinux
setenforce 0
# 查看状态
getenforce
配置hosts
- 所有主机操作
vim cat /etc/hosts
# 增加如下配置
192.168.31.55 k8s-master
192.168.31.86 k8s-node01
192.168.31.195 k8s-node02
配置免密登录
- 任意节点执行
- 可选操作
#安装sshpass
dnf install sshpass -y
#静默生成秘钥
ssh-keygen -P '' -q -t rsa -f .ssh/id_rsa
# 密码
Password=YOURPASSWORD
#复制公钥到所有节点
for i in `awk -F"[ ]+" '/k8s/{print $0}' /etc/hosts`; do sshpass -p $Password ssh-copy-id -o StrictHostKeyChecking=no root@$i ;done
#复制整个.ssh下文件到所有节点
scp -r $HOME/.ssh/* root@192.168.31.86:$HOME/.ssh/
scp -r $HOME/.ssh/* root@192.168.31.195:$HOME/.ssh/
配置时间同步
- 所有节点操作
# 安装时间同步
dnf install chrony -y
# 编辑配置文件加入一下内容
vim /etc/chrony.conf
pool ntp1.aliyun.com iburst
pool ntp2.aliyun.com iburst
pool cn.pool.ntp.org iburst
# 配置开机自启
systemctl enable --now chronyd
# 测试
chronyc sources
禁用swap分区
- 所有节点操作
# 临时禁用
swapoff -a
# 永久禁用
sed -i 's/.*swap.*/#&/' /etc/fstab
修改内核参数
- 所有节点操作
# k8s配置文件
cat >> /etc/sysctl.d/k8s.conf << EOF
#内核参数调整
vm.swappiness=0
#配置iptables参数,使得流经网桥的流量也经过iptables/netfilter防火墙
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF
# 加载网桥过滤模块
cat <<EOF | tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
modprobe overlay
modprobe br_netfilter
# 重新加载
sysctl --system
# 检测
lsmod | grep br_netfilter
# 返回如下内容表示成功
[root@k8s-master ~]# lsmod | grep br_netfilter
br_netfilter 32768 0
bridge 303104 1 br_netfilter
配置ipvs功能
- 所有节点操作
# 安装ipset和ipvsadm
dnf install ipset ipvsadm -y
# 添加需要加载的模块写入脚本文件
cat <<EOF | sudo tee /etc/modules-load.d/ipvs.conf
overlay
ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
nf_conntrack
EOF
modprobe overlay
modprobe ip_vs && modprobe ip_vs_rr && modprobe ip_vs_wrr && modprobe ip_vs_sh && modprobe nf_conntrack
#查看模块是否加载成功
[root@k8s-master ~]# lsmod | grep -e ip_vs -e nf_conntrack_ipv4
ip_vs_sh 12288 0
ip_vs_wrr 12288 0
ip_vs_rr 12288 6
ip_vs 184320 12 ip_vs_rr,ip_vs_sh,ip_vs_wrr
nf_conntrack 200704 7 xt_conntrack,nf_nat,xt_nat,nf_conntrack_netlink,xt_CT,xt_MASQUERADE,ip_vs
nf_defrag_ipv6 24576 2 nf_conntrack,ip_vs
libcrc32c 12288 5 nf_conntrack,nf_nat,nf_tables,xfs,ip_vs
安装Docker
- 所有节点操作
配置仓库源
# 添加阿里云docker仓库
dnf config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# 切换源
sed -i 's+download.docker.com+mirrors.aliyun.com/docker-ce+' /etc/yum.repos.d/docker-ce.repo
# 更新源数据
dnf makecache
安装docker-ce
# 安装最新版
dnf install docker-ce -y
修改配置文件
cat > /etc/docker/daemon.json <<EOF
{
"registry-mirrors": [
"https://dockerhub.azk8s.cn",
"https://docker.mirrors.ustc.edu.cn",
"http://hub-mirror.c.163.com",
"https://mirror.ccs.tencentyun.com",
"https://nfvzt07v.mirror.aliyuncs.com",
"https://ba301968e4af4e539e8a64abc60c9ff5.mirror.swr.myhuaweicloud.com"
],
"max-concurrent-downloads": 10,
"log-driver": "json-file",
"log-level": "warn",
"log-opts": {
"max-size": "10m",
"max-file": "3"
},
"data-root": "/var/lib/docker",
"exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
重启docker服务
systemctl daemon-reload && systemctl restart docker && systemctl enable docker
安装运行时环境
- 所有节点操作
下载cri-docker
# 下载对应版本和平台amd/arm的包
https://github.com/Mirantis/cri-dockerd/releases
# 解压
tar -xf cri-dockerd-0.3.16.arm64.tgz
# 拷贝并设置执行权限
cp cri-dockerd/cri-dockerd /usr/bin/
chmod +x /usr/bin/cri-dockerd
配置cri-docker服务
cat <<"EOF" > /usr/lib/systemd/system/cri-docker.service
[Unit]
Description=CRI Interface for Docker Application Container Engine
Documentation=https://docs.mirantis.com
After=network-online.target firewalld.service docker.service
Wants=network-online.target
Requires=cri-docker.socket
[Service]
Type=notify
ExecStart=/usr/bin/cri-dockerd --network-plugin=cni --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.10
ExecReload=/bin/kill -s HUP $MAINPID
TimeoutSec=0
RestartSec=2
Restart=always
StartLimitBurst=3
StartLimitInterval=60s
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity
TasksMax=infinity
Delegate=yes
KillMode=process
[Install]
WantedBy=multi-user.target
EOF
添加cri-docker套接字
cat <<"EOF" > /usr/lib/systemd/system/cri-docker.socket
[Unit]
Description=CRI Docker Socket for the API
PartOf=cri-docker.service
[Socket]
ListenStream=%t/cri-dockerd.sock
SocketMode=0660
SocketUser=root
SocketGroup=docker
[Install]
WantedBy=sockets.target
EOF
启动cri-docker对应服务
systemctl daemon-reload
# 设置开机启动
systemctl enable cri-docker
# 启动服务
systemctl start cri-docker
# 查看启动状态
systemctl is-active cri-docker # 输出结果为active表示启动正常
# 如果启动失败,可以通过以下命令查看日志
journalctl -u cri-docker
安装k8s集群
配置仓库源
-
所有节点操作
-
阿里源配置参考地址:
https://developer.aliyun.com/mirror/kubernetes?spm=a2c6h.13651102.0.0.362f1b11DDkjO1
# 配置1.32版本阿里源
cat <<EOF | tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.32/rpm/
enabled=1
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.32/rpm/repodata/repomd.xml.key
EOF
安装k8s
- 所有节点操作
# 安装
dnf install -y kubelet kubeadm kubectl
# 启动kubelet并设置开机自启
systemctl enable --now kubelet
# 查看拉取的镜像
kubeadm config images list --image-repository=registry.aliyuncs.com/google_containers
# 拉取镜像 --cri-socket 指定拉取时使用的容器运行时
kubeadm config images pull --image-repository=registry.aliyuncs.com/google_containers --cri-socket unix:///var/run/cri-dockerd.sock
初始化k8s集群
master节点操作
kubeadm init --kubernetes-version=1.32.1 \
--apiserver-advertise-address=192.168.31.55 \
--image-repository registry.aliyuncs.com/google_containers \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16 \
--ignore-preflight-errors=Swap \
--cri-socket=unix:///var/run/cri-dockerd.sock
# 初始化成功会显示如下信息
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.31.55:6443 --token xxxxx \
--discovery-token-ca-cert-hash sha256:xxxx
- 集群初始化可选配置参数
| 参数 | 说明 |
|---|---|
--apiserver-advertise-address | 指定API Server地址 |
--apiserver-bind-port | 指定绑定的API Server端口,默认值为6443 |
--ignore-preflight-errors | 忽视检查项错误列表,例如IsPrivilegedUser,Swap,如填写为 all 则将忽视所有的检查项错误 |
--kubernetes-version | 指定Kubernetes版本 |
--pod-network-cidr | 指定pod网络IP地址段 |
--service-cidr | 指定service的IP地址段 |
--service-dns-domain | 指定Service的域名,默认为cluster.local |
--token | 指定token |
--token-ttl | 指定token有效时间,如果设置为0,则永不过期 |
--image-repository | 指定镜像仓库地址,默认为k8s.gcr.io |
设置k8s配置文件
master节点操作
# 设置配置文件
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
# 复制到其他子节点
scp -r $HOME/.kube k8s-node01:$HOME/
scp -r $HOME/.kube k8s-node02:$HOME/
# 查看主节点状态
kubectl get componentstatuses
# 输出如下信息
NAME STATUS MESSAGE ERROR
scheduler Healthy ok
controller-manager Healthy ok
etcd-0 Healthy ok
从节点加入集群
- 所有从节点执行
# 使用cri-dockerd需要再kubeadm join后加入--cri-socket=unix:///var/run/cri-dockerd.sock
kubeadm join 192.168.31.55:6443 --token u43btt.zeer6r3fwlg04df1 \
--discovery-token-ca-cert-hash sha256:f0f3e18955e51d8a251c31b20b301d95fea1503b8e7b12e109d6d844f7db29f9 --cri-socket unix:///var/run/cri-dockerd.sock
- 主节点查看状态
kubectl get nodes
# 显示如下
[root@k8s-master ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8s-master NotReady control-plane 5h33m v1.32.1
k8s-node01 NotReady <none> 5h29m v1.32.1
k8s-node02 NotReady <none> 5h29m v1.32.1
# 当前集群是NotReady状态,还需要配置网络后才可用
设置k8s网络
master节点操作- 使用
calico插件配置网络
安装Tigera Calico
- 官方网址:
https://archive-os-3-26.netlify.app/calico/3.26/getting-started/kubernetes/quickstart/ - 打开文档中给定的最新版本地址,下载
tigera-operator.yaml文件 - 打开文档中给定的最新版本地址,下载
custom-resources.yaml文件
# 例如
wget https://raw.githubusercontent.com/projectcalico/calico/v3.26.5/manifests/tigera-operator.yaml
wget https://raw.githubusercontent.com/projectcalico/calico/v3.26.5/manifests/custom-resources.yaml
- 安装
tigera calico
# 创建
kubectl create -f tigera-operator.yaml
# 查看tigera-operator内pod是否创建成功
kubectl get pods -n tigera-operator
# 输出如下内容表示创建成功
[root@k8s-master ~]# kubectl get pods -n tigera-operator
NAME READY STATUS RESTARTS AGE
tigera-operator-7d68577dc5-b99xh 1/1 Running 0 5h39m
安装calico
- 修改
custom-resources.yaml配置文件
vim custom-resources.yaml
# cidr的地址为初始化k8s集群是配置的--pod-network-cidr=10.244.0.0/16地址
cidr: 192.168.0.0/16
修改为
cidr: 10.244.0.0/16
- 创建资源
kubectl apply -f custom-resources.yaml
- 验证
# 查看是否创建calico-system命名空间
kubectl get ns
# 输出如下内容表示创建成功
[root@k8s-master ~]# kubectl get ns
NAME STATUS AGE
calico-apiserver Active 5h44m
calico-system Active 5h44m
default Active 5h50m
kube-node-lease Active 5h50m
kube-public Active 5h50m
kube-system Active 5h50m
tigera-operator Active 5h45m
# 查看pod是否创建完成
kubectl get pod -n calico-system
# 输出如下内容表示创建完成
[root@k8s-master ~]# kubectl get pod -n calico-system
NAME READY STATUS RESTARTS AGE
calico-kube-controllers-7b9949b59d-krp9n 1/1 Running 0 5h45m
calico-node-cd4wv 1/1 Running 0 5h45m
calico-node-lss9l 1/1 Running 0 5h45m
calico-node-ltxqf 1/1 Running 0 5h45m
calico-typha-d5b6c59d8-84cqn 1/1 Running 0 5h45m
calico-typha-d5b6c59d8-w6xbw 1/1 Running 0 5h45m
csi-node-driver-hk2jf 2/2 Running 0 5h45m
csi-node-driver-hkwkf 2/2 Running 0 5h45m
csi-node-driver-jr94w 2/2 Running 0 5h45m
# 查看集群状态是否为ready
kubectl get nodes
# 输出如下内容即为成功
[root@k8s-master ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8s-master Ready control-plane 5h53m v1.32.1
k8s-node01 Ready <none> 5h50m v1.32.1
k8s-node02 Ready <none> 5h49m v1.32.1
- 可能出现的问题
- 未创建
calico-system命名空间,可能是cidr网段修改不正确 calico-node READY列是0/1,可以使用kubectl describe pod pod名称 -n calico-system查看具体原因- 如果镜像下载失败,可以手动到
https://github.com/projectcalico/calico/releases/tag/v3.29.1下载最新版本的release-v3.29.1.tgz文件,解压后进入image镜像文件夹后通过docker load -i 文件名导入镜像
- 未创建
配置k8s使用ipvs
master节点操作
# 编辑k8s-proxy的configmap文件,修改mode值为ipvs
kubectl edit configmaps kube-proxy -n kube-system
49 kind: KubeProxyConfiguration
50 logging:
51 flushFrequency: 0
52 options:
53 json:
54 infoBufferSize: "0"
55 text:
56 infoBufferSize: "0"
57 verbosity: 0
58 metricsBindAddress: ""
59 mode: "ipvs" # 将mode: ""修改为mode: "ipvs"
60 nftables:
61 masqueradeAll: false
62 masqueradeBit: null
63 minSyncPeriod: 0s
64 syncPeriod: 0s
65 nodePortAddresses: null
66 oomScoreAdj: null
67 portRange: ""
68 showHiddenMetricsForVersion: ""
69 winkernel:
70 enableDSR: false
71 forwardHealthCheckVip: false
72 networkName: ""
73 rootHnsEndpointName: ""
74 sourceVip: ""
# 删除所有kube-proxy让k8s进行自愈重建
kubectl delete pod -l k8s-app=kube-proxy -n kube-system
# 验证ipvs是否可用
ipvsadm -ln
# 输出如下内容
[root@k8s-master ~]# ipvsadm -ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 10.96.0.1:443 rr
-> 192.168.31.55:6443 Masq 1 0 0
TCP 10.96.0.10:53 rr
-> 10.244.85.193:53 Masq 1 0 0
-> 10.244.85.195:53 Masq 1 0 0
TCP 10.96.0.10:9153 rr
-> 10.244.85.193:9153 Masq 1 0 0
-> 10.244.85.195:9153 Masq 1 0 0
TCP 10.104.117.81:5473 rr
-> 192.168.31.86:5473 Masq 1 0 0
-> 192.168.31.195:5473 Masq 1 0 0
TCP 10.106.176.144:443 rr
-> 10.244.85.194:5443 Masq 1 0 0
-> 10.244.85.196:5443 Masq 1 0 0
UDP 10.96.0.10:53 rr
-> 10.244.85.193:53 Masq 1 0 0
-> 10.244.85.195:53 Masq 1 0 0