K8S集群安装

1,520 阅读7分钟

环境

软件版本

  • 操作系统:Rocky Linux 9
  • kubernetes版本:1.32.1
  • 运行时容器:cri-docker
  • 网络插件:calico

操作系统分配

主机配置IP
K8s-master2核4G192.168.31.55
K8s-node012核4G192.168.31.86
K8s-node022核4G192.168.31.195

操作系统初始化

更换系统软件源

sed -e 's|^mirrorlist=|#mirrorlist=|g' \
    -e 's|^#baseurl=http://dl.rockylinux.org/$contentdir|baseurl=https://mirrors.aliyun.com/rockylinux|g' \
    -i.bak \
    /etc/yum.repos.d/[Rr]ocky*.repo
    
# 刷新缓存
dnf makecache

修改主机名字

  • 所有节点操作
# 修改主机名
hostnamectl set-hostname 主机名
# 重启
reboot

关闭防火墙

  • 所有主机操作
#关闭并禁止开机自启
systemctl disable --now firewalld.service
# 查看状态
systemctl status firewalld.service

关闭SELinux

  • 所有主机操作
# 永久关闭
sed -ri 's#(SELINUX=)enforcing#\1disabled#g' /etc/selinux/config
# 临时关闭selinux
setenforce 0
# 查看状态
getenforce

配置hosts

  • 所有主机操作
vim cat /etc/hosts
# 增加如下配置
192.168.31.55 k8s-master
192.168.31.86 k8s-node01
192.168.31.195 k8s-node02

配置免密登录

  • 任意节点执行
  • 可选操作
#安装sshpass
dnf install sshpass -y
#静默生成秘钥
ssh-keygen -P '' -q  -t rsa -f .ssh/id_rsa
# 密码
Password=YOURPASSWORD
#复制公钥到所有节点
for i in `awk -F"[ ]+" '/k8s/{print $0}' /etc/hosts`; do sshpass -p $Password ssh-copy-id -o StrictHostKeyChecking=no root@$i ;done 
#复制整个.ssh下文件到所有节点
scp -r $HOME/.ssh/* root@192.168.31.86:$HOME/.ssh/
scp -r $HOME/.ssh/* root@192.168.31.195:$HOME/.ssh/

配置时间同步

  • 所有节点操作
# 安装时间同步
dnf install chrony -y
# 编辑配置文件加入一下内容
vim /etc/chrony.conf

pool ntp1.aliyun.com iburst
pool ntp2.aliyun.com iburst
pool cn.pool.ntp.org iburst

# 配置开机自启
systemctl enable --now chronyd

# 测试
chronyc sources

禁用swap分区

  • 所有节点操作
# 临时禁用
swapoff -a
# 永久禁用
sed -i 's/.*swap.*/#&/' /etc/fstab

修改内核参数

  • 所有节点操作
# k8s配置文件
cat >> /etc/sysctl.d/k8s.conf << EOF
#内核参数调整
vm.swappiness=0 
#配置iptables参数,使得流经网桥的流量也经过iptables/netfilter防火墙
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF

# 加载网桥过滤模块
cat <<EOF | tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

modprobe overlay
modprobe br_netfilter

# 重新加载
sysctl --system
# 检测
lsmod | grep br_netfilter
# 返回如下内容表示成功
[root@k8s-master ~]# lsmod | grep br_netfilter
br_netfilter           32768  0
bridge                303104  1 br_netfilter

配置ipvs功能

  • 所有节点操作
# 安装ipset和ipvsadm
dnf install ipset ipvsadm -y
# 添加需要加载的模块写入脚本文件
cat <<EOF | sudo tee /etc/modules-load.d/ipvs.conf
overlay
ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
nf_conntrack
EOF

modprobe overlay
modprobe ip_vs && modprobe ip_vs_rr && modprobe ip_vs_wrr && modprobe ip_vs_sh && modprobe nf_conntrack
 
#查看模块是否加载成功
[root@k8s-master ~]# lsmod | grep -e ip_vs -e nf_conntrack_ipv4
ip_vs_sh               12288  0
ip_vs_wrr              12288  0
ip_vs_rr               12288  6
ip_vs                 184320  12 ip_vs_rr,ip_vs_sh,ip_vs_wrr
nf_conntrack          200704  7 xt_conntrack,nf_nat,xt_nat,nf_conntrack_netlink,xt_CT,xt_MASQUERADE,ip_vs
nf_defrag_ipv6         24576  2 nf_conntrack,ip_vs
libcrc32c              12288  5 nf_conntrack,nf_nat,nf_tables,xfs,ip_vs

安装Docker

  • 所有节点操作

配置仓库源

# 添加阿里云docker仓库
dnf config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo  
# 切换源
sed -i 's+download.docker.com+mirrors.aliyun.com/docker-ce+' /etc/yum.repos.d/docker-ce.repo
# 更新源数据
dnf makecache 

安装docker-ce

# 安装最新版
dnf install docker-ce -y

修改配置文件

cat > /etc/docker/daemon.json <<EOF
{
 "registry-mirrors": [
    "https://dockerhub.azk8s.cn",
    "https://docker.mirrors.ustc.edu.cn",
    "http://hub-mirror.c.163.com",
    "https://mirror.ccs.tencentyun.com",
    "https://nfvzt07v.mirror.aliyuncs.com",
	  "https://ba301968e4af4e539e8a64abc60c9ff5.mirror.swr.myhuaweicloud.com"
  ],
  "max-concurrent-downloads": 10,
  "log-driver": "json-file",
  "log-level": "warn",
  "log-opts": {
    "max-size": "10m",
    "max-file": "3"
    },
  "data-root": "/var/lib/docker",
  "exec-opts": ["native.cgroupdriver=systemd"]
}
EOF

重启docker服务

systemctl daemon-reload && systemctl restart docker && systemctl enable docker

安装运行时环境

  • 所有节点操作

下载cri-docker

# 下载对应版本和平台amd/arm的包
https://github.com/Mirantis/cri-dockerd/releases
# 解压
tar -xf cri-dockerd-0.3.16.arm64.tgz
# 拷贝并设置执行权限
cp cri-dockerd/cri-dockerd /usr/bin/
chmod +x /usr/bin/cri-dockerd

配置cri-docker服务

cat <<"EOF" > /usr/lib/systemd/system/cri-docker.service
[Unit]
Description=CRI Interface for Docker Application Container Engine
Documentation=https://docs.mirantis.com
After=network-online.target firewalld.service docker.service
Wants=network-online.target
Requires=cri-docker.socket
[Service]
Type=notify
ExecStart=/usr/bin/cri-dockerd --network-plugin=cni --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.10
ExecReload=/bin/kill -s HUP $MAINPID
TimeoutSec=0
RestartSec=2
Restart=always
StartLimitBurst=3
StartLimitInterval=60s
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity
TasksMax=infinity
Delegate=yes
KillMode=process
[Install]
WantedBy=multi-user.target
EOF

添加cri-docker套接字

cat <<"EOF" > /usr/lib/systemd/system/cri-docker.socket
[Unit]
Description=CRI Docker Socket for the API
PartOf=cri-docker.service
[Socket]
ListenStream=%t/cri-dockerd.sock
SocketMode=0660
SocketUser=root
SocketGroup=docker
[Install]
WantedBy=sockets.target
EOF

启动cri-docker对应服务

systemctl daemon-reload
# 设置开机启动
systemctl enable cri-docker
# 启动服务
systemctl start cri-docker
# 查看启动状态
systemctl is-active cri-docker # 输出结果为active表示启动正常

# 如果启动失败,可以通过以下命令查看日志
journalctl -u cri-docker

安装k8s集群

配置仓库源

  • 所有节点操作

  • 阿里源配置参考地址:https://developer.aliyun.com/mirror/kubernetes?spm=a2c6h.13651102.0.0.362f1b11DDkjO1

# 配置1.32版本阿里源
cat <<EOF | tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.32/rpm/
enabled=1
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.32/rpm/repodata/repomd.xml.key
EOF

安装k8s

  • 所有节点操作
# 安装
dnf install -y kubelet kubeadm kubectl
# 启动kubelet并设置开机自启
systemctl enable --now  kubelet
# 查看拉取的镜像 
kubeadm config images list --image-repository=registry.aliyuncs.com/google_containers
# 拉取镜像 --cri-socket 指定拉取时使用的容器运行时
kubeadm config images pull --image-repository=registry.aliyuncs.com/google_containers --cri-socket unix:///var/run/cri-dockerd.sock

初始化k8s集群

  • master节点操作
kubeadm init --kubernetes-version=1.32.1 \
--apiserver-advertise-address=192.168.31.55 \
--image-repository registry.aliyuncs.com/google_containers \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16 \
--ignore-preflight-errors=Swap \
--cri-socket=unix:///var/run/cri-dockerd.sock

# 初始化成功会显示如下信息
Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.31.55:6443 --token xxxxx \
        --discovery-token-ca-cert-hash sha256:xxxx
  • 集群初始化可选配置参数
参数说明
--apiserver-advertise-address指定API Server地址
--apiserver-bind-port指定绑定的API Server端口,默认值为6443
--ignore-preflight-errors忽视检查项错误列表,例如IsPrivilegedUser,Swap,如填写为 all 则将忽视所有的检查项错误
--kubernetes-version指定Kubernetes版本
--pod-network-cidr指定pod网络IP地址段
--service-cidr指定service的IP地址段
--service-dns-domain指定Service的域名,默认为cluster.local
--token指定token
--token-ttl指定token有效时间,如果设置为0,则永不过期
--image-repository指定镜像仓库地址,默认为k8s.gcr.io

设置k8s配置文件

  • master节点操作
# 设置配置文件
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

# 复制到其他子节点
scp -r $HOME/.kube k8s-node01:$HOME/
scp -r $HOME/.kube k8s-node02:$HOME/

# 查看主节点状态
kubectl get componentstatuses
# 输出如下信息
NAME                 STATUS    MESSAGE   ERROR
scheduler            Healthy   ok        
controller-manager   Healthy   ok        
etcd-0               Healthy   ok   

从节点加入集群

  • 所有从节点执行
# 使用cri-dockerd需要再kubeadm join后加入--cri-socket=unix:///var/run/cri-dockerd.sock
kubeadm join 192.168.31.55:6443 --token u43btt.zeer6r3fwlg04df1 \
        --discovery-token-ca-cert-hash sha256:f0f3e18955e51d8a251c31b20b301d95fea1503b8e7b12e109d6d844f7db29f9 --cri-socket unix:///var/run/cri-dockerd.sock
  • 主节点查看状态
kubectl get nodes

# 显示如下
[root@k8s-master ~]# kubectl get nodes
NAME         STATUS       ROLES           AGE     VERSION
k8s-master   NotReady    control-plane   5h33m   v1.32.1
k8s-node01   NotReady    <none>          5h29m   v1.32.1
k8s-node02   NotReady    <none>          5h29m   v1.32.1
# 当前集群是NotReady状态,还需要配置网络后才可用

设置k8s网络

  • master节点操作
  • 使用calico插件配置网络

安装Tigera Calico

  • 官方网址:https://archive-os-3-26.netlify.app/calico/3.26/getting-started/kubernetes/quickstart/
  • 打开文档中给定的最新版本地址,下载tigera-operator.yaml文件
  • 打开文档中给定的最新版本地址,下载custom-resources.yaml文件
# 例如
wget https://raw.githubusercontent.com/projectcalico/calico/v3.26.5/manifests/tigera-operator.yaml
wget https://raw.githubusercontent.com/projectcalico/calico/v3.26.5/manifests/custom-resources.yaml
  • 安装tigera calico
# 创建
kubectl create -f tigera-operator.yaml
# 查看tigera-operator内pod是否创建成功
kubectl get pods -n tigera-operator
# 输出如下内容表示创建成功
[root@k8s-master ~]# kubectl get pods -n tigera-operator
NAME                               READY   STATUS    RESTARTS   AGE
tigera-operator-7d68577dc5-b99xh   1/1     Running   0          5h39m

安装calico

  • 修改custom-resources.yaml配置文件
vim custom-resources.yaml

# cidr的地址为初始化k8s集群是配置的--pod-network-cidr=10.244.0.0/16地址
cidr: 192.168.0.0/16 
修改为
cidr: 10.244.0.0/16
  • 创建资源
kubectl apply -f custom-resources.yaml
  • 验证
# 查看是否创建calico-system命名空间
kubectl get ns
# 输出如下内容表示创建成功
[root@k8s-master ~]# kubectl get ns 
NAME               STATUS   AGE
calico-apiserver   Active   5h44m
calico-system      Active   5h44m
default            Active   5h50m
kube-node-lease    Active   5h50m
kube-public        Active   5h50m
kube-system        Active   5h50m
tigera-operator    Active   5h45m


# 查看pod是否创建完成
kubectl get pod -n calico-system
# 输出如下内容表示创建完成
[root@k8s-master ~]# kubectl get pod -n calico-system  
NAME                                       READY   STATUS    RESTARTS   AGE
calico-kube-controllers-7b9949b59d-krp9n   1/1     Running   0          5h45m
calico-node-cd4wv                          1/1     Running   0          5h45m
calico-node-lss9l                          1/1     Running   0          5h45m
calico-node-ltxqf                          1/1     Running   0          5h45m
calico-typha-d5b6c59d8-84cqn               1/1     Running   0          5h45m
calico-typha-d5b6c59d8-w6xbw               1/1     Running   0          5h45m
csi-node-driver-hk2jf                      2/2     Running   0          5h45m
csi-node-driver-hkwkf                      2/2     Running   0          5h45m
csi-node-driver-jr94w                      2/2     Running   0          5h45m


# 查看集群状态是否为ready
kubectl get nodes
# 输出如下内容即为成功
[root@k8s-master ~]# kubectl get nodes
NAME         STATUS   ROLES           AGE     VERSION
k8s-master   Ready    control-plane   5h53m   v1.32.1
k8s-node01   Ready    <none>          5h50m   v1.32.1
k8s-node02   Ready    <none>          5h49m   v1.32.1
  • 可能出现的问题
    • 未创建calico-system命名空间,可能是cidr网段修改不正确
    • calico-node READY 列是0/1,可以使用kubectl describe pod pod名称 -n calico-system查看具体原因
    • 如果镜像下载失败,可以手动到https://github.com/projectcalico/calico/releases/tag/v3.29.1下载最新版本的release-v3.29.1.tgz文件,解压后进入image镜像文件夹后通过docker load -i 文件名导入镜像

配置k8s使用ipvs

  • master节点操作
# 编辑k8s-proxy的configmap文件,修改mode值为ipvs
kubectl edit configmaps kube-proxy  -n kube-system

49     kind: KubeProxyConfiguration
50     logging:
51       flushFrequency: 0
52       options:
53         json:
54           infoBufferSize: "0"
55         text:
56           infoBufferSize: "0"
57       verbosity: 0
58     metricsBindAddress: ""
59     mode: "ipvs" # 将mode: ""修改为mode: "ipvs"
60     nftables:
61       masqueradeAll: false
62       masqueradeBit: null
63       minSyncPeriod: 0s
64       syncPeriod: 0s
65     nodePortAddresses: null
66     oomScoreAdj: null
67     portRange: ""
68     showHiddenMetricsForVersion: ""
69     winkernel:
70       enableDSR: false
71       forwardHealthCheckVip: false
72       networkName: ""
73       rootHnsEndpointName: ""
74       sourceVip: ""


# 删除所有kube-proxy让k8s进行自愈重建
kubectl delete pod -l k8s-app=kube-proxy  -n kube-system 

# 验证ipvs是否可用
ipvsadm -ln
# 输出如下内容
[root@k8s-master ~]# ipvsadm -ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
  -> RemoteAddress:Port           Forward Weight ActiveConn InActConn
TCP  10.96.0.1:443 rr
  -> 192.168.31.55:6443           Masq    1      0          0         
TCP  10.96.0.10:53 rr
  -> 10.244.85.193:53             Masq    1      0          0         
  -> 10.244.85.195:53             Masq    1      0          0         
TCP  10.96.0.10:9153 rr
  -> 10.244.85.193:9153           Masq    1      0          0         
  -> 10.244.85.195:9153           Masq    1      0          0         
TCP  10.104.117.81:5473 rr
  -> 192.168.31.86:5473           Masq    1      0          0         
  -> 192.168.31.195:5473          Masq    1      0          0         
TCP  10.106.176.144:443 rr
  -> 10.244.85.194:5443           Masq    1      0          0         
  -> 10.244.85.196:5443           Masq    1      0          0         
UDP  10.96.0.10:53 rr
  -> 10.244.85.193:53             Masq    1      0          0         
  -> 10.244.85.195:53             Masq    1      0          0