【Kubernetes】实战记录:集群搭建

389 阅读5分钟

1 环境配置

1.1 节点准备

系统类型IP地址节点角色CPUMemoryHostname
centos-7.3192.168.3.180master>=2>=2GBmaster1
centos-7.3192.168.3.112master>=2>=2GBmaster2
centos-7.3192.168.3.183master>=2>=2GBmaster3
centos-7.3192.168.3.182worker>=2>=2GBworker1
centos-7.3192.168.3.181worker>=2>=2GBworker2

1.2 节点配置

设置主机名:主机名必须每个节点都不一样,并且保证所有点之间可以通过hostname互相访问。

# 查看主机名
$ hostname
# 修改主机名
$ hostnamectl set-hostname <your_hostname>
# 配置host,使所有节点之间可以通过hostname互相访问

修改host文件

$ vi /etc/hosts
# <node-ip> <node-hostname>

安装依赖

# 更新yum
$ yum update
# 安装依赖包
$ yum install -y conntrack ipvsadm ipset jq sysstat curl iptables libseccomp

其他配置

# 关闭防火墙
$ systemctl stop firewalld && systemctl disable firewalld
# 重置iptables
$ iptables -F && iptables -X && iptables -F -t nat && iptables -X -t nat && iptables -P FORWARD ACCEPT
# 关闭swap
$ swapoff -a
$ sed -i '/swap/s/^\(.*\)$/#\1/g' /etc/fstab
# 关闭selinux
$ setenforce 0
# 关闭dnsmasq(否则可能导致docker容器无法解析域名)
$ service dnsmasq stop && systemctl disable dnsmasq

系统参数配置

# 制作配置文件
$ cat > /etc/sysctl.d/kubernetes.conf <<EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
vm.swappiness=0
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
EOF
# 生效文件
$ sysctl -p /etc/sysctl.d/kubernetes.conf

坑1:cannot stat /proc/sys/net/bridge/bridge-nf-call-iptables: No such file or directory

解决方法:modprobe br_netfilter

2 安装Docker

$ yum install https://download.docker.com/linux/centos/7/x86_64/stable/Packages/docker-ce-selinux-17.03.1.ce-1.el7.centos.noarch.rpm
$ yum install https://download.docker.com/linux/centos/7/x86_64/stable/Packages/docker-ce-17.03.1.ce-1.el7.centos.x86_64.rpm
# 开机启动
$ systemctl enable docker
# 设置参数
# 1.查看磁盘挂载
$ df -h
Filesystem      Size  Used Avail Use% Mounted on
/dev/sda2        98G  2.8G   95G   3% /
devtmpfs         63G     0   63G   0% /dev
/dev/sda5      1015G  8.8G 1006G   1% /tol
/dev/sda1       197M  161M   37M  82% /boot
# 2.设置docker启动参数
# - 设置docker数据目录:选择比较大的分区(我这里是根目录就不需要配置了,默认为/var/lib/docker)
# - 设置cgroup driver(默认是cgroupfs,主要目的是与kubelet配置统一,这里也可以不设置后面在kubelet中指定cgroupfs)
$ cat <<EOF > /etc/docker/daemon.json
{
    "graph": "/docker/data/path",
    "exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
# 启动docker服务
service docker restart

3 安装工具

# 配置yum源
$ cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=http://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=http://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg
       http://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

# 安装工具
# 找到要安装的版本号
$ yum list kubeadm --showduplicates | sort -r

# 安装指定版本(这里用的是1.14.0)
$ yum install -y kubeadm-1.14.0-0 kubelet-1.14.0-0 kubectl-1.14.0-0 --disableexcludes=kubernetes

# 查找kubelet.service.d的位置
$ find / -name "kubelet*" 

# 设置kubelet的cgroupdriver(kubelet的cgroupdriver默认为systemd,如果上面没有设置docker的exec-opts为systemd,这里就需要将kubelet的设置为cgroupfs)
$ sed -i "s/cgroup-driver=systemd/cgroup-driver=cgroupfs/g" /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf

# 启动kubelet
$ systemctl enable kubelet && systemctl start kubelet

坑2:缺少依赖yum -y install kubernetes-cni = 0.7.5

4 准备配置文件(Master1)

$ cd ~ && git clone https://gitee.com/admxj/kubernetes-ha-kubeadm.git addons:kubernetes的插件,比如calico和dashboard。

configs:包含了部署集群过程中用到的各种配置文件。

scripts:包含部署集群过程中用到的脚本,如keepalive检查脚本。

global-configs.properties:全局配置,包含各种易变的配置内容。

init.sh:初始化脚本,配置好global-config之后,会自动生成所有配置文件。

#kubernetes版本
VERSION=v1.14.0

#POD网段
POD_CIDR=172.22.0.0/16

#master虚拟ip
MASTER_VIP=192.168.8.188

#keepalived用到的网卡接口名
VIP_IF=ens33
# 生成配置文件,确保执行过程没有异常信息
$ ./init.sh

# 查看生成的配置文件,确保脚本执行成功
$ find target/ -type f

坑3:-bash: ./init.sh: Permission denied

chmod 777 init.sh

[root@jxd-master1 kubernetes-ha-kubeadm]# ./init.sh
====替换变量列表====
VERSION=v1.14.0
POD_CIDR=172.22.0.0/16
MASTER_VIP=192.168.3.199
VIP_IF=ens1

====替换脚本====
scripts/check-apiserver.sh

====替换配置文件====
configs/keepalived-backup.conf
configs/keepalived-master.conf
configs/kubeadm-config.yaml
addons/calico-rbac-kdd.yaml
addons/calico.yaml
addons/dashboard-all.yaml
配置生成成功,位置: /root/kubernetes-ha-kubeadm/target

5 搭建高可用集群(Master1、Master2)

Master1作为主节点,Master2作为备用节点

5.1 安装keepalived

yum install -y keepalived

创建keepalived配置文件,使用xshell工具将命令发送到所有会话

# 创建目录
$ mkdir -p /etc/keepalived

# 分发配置文件
$ cp target/configs/keepalived-master.conf /etc/keepalived/keepalived.conf

# 分发监测脚本
$ cp target/scripts/check-apiserver.sh /etc/keepalived/

keepalived配置文件:

! Configuration File for keepalived
global_defs {
 router_id keepalive-master
}

vrrp_script check_apiserver {
 script "/etc/keepalived/check-apiserver.sh"
 interval 3 # 每隔3s执行一次
 weight -2 # 权重-2
}

vrrp_instance VI-kube-master {
   state MASTER
   interface ens1
   virtual_router_id 68
   priority 100
   dont_track_primary
   advert_int 3
   virtual_ipaddress { # api server
     192.168.3.199
   }
   track_script { # 检查的脚本
       check_apiserver
   }
}

5.2 启动keepalived

# 分别在master和backup上启动服务
$ systemctl enable keepalived && service keepalived start

# 检查状态
$ service keepalived status

# 查看日志
$ journalctl -f -u keepalived

# 查看虚拟ip
$ ip a

keepalived.service处于active的状态 image.png

虚拟IP绑定到了eth1网卡上 image.png

可以ping通虚拟IP image.png

5.3 部署第一个主节点

# 准备配置文件
$ cp target/configs/kubeadm-config.yaml ~
# ssh到第一个主节点,执行kubeadm初始化系统(注意保存最后打印的加入集群的命令)
$ kubeadm init --config=kubeadm-config.yaml --experimental-upload-certs

# copy kubectl配置(上一步会有提示)
$ mkdir -p ~/.kube
$ cp -i /etc/kubernetes/admin.conf ~/.kube/config

# 测试一下kubectl
$ kubectl get pods --all-namespaces

注意:需要将init打印的join命令保存下来,之后加入worker节点的时候需要使用到。

kubeadm join 192.168.3.199:6443 --token 8qn6rx.rnariugyxjsd642z \
    --discovery-token-ca-cert-hash sha256:4fd89120cc158a17b9bb898a1cebb879dba88795904a4b9c7de8f66718effa9d \
    --experimental-control-plane --certificate-key 1d18e633630307ac7d4b8ca7c8bec4dbb03fba7be7e6b6270a101fca4d21e651

坑4:kbeadm初始化系统,报错:版本不匹配

error execution phase preflight: [preflight] Some fatal errors occurred:
	[ERROR KubeletVersion]: the kubelet version is higher than the control plane version. This is not a supported version skew and may lead to a malfunctional cluster. Kubelet version: "1.20.5" Control plane version: "1.14.0"
[preflight] If you know what you are doing, you can make a check non-fatal with `--ignore-preflight-errors=...`

解决方法:重新安装kubelet(奇怪的是之前安装的时候明明指定了kubelet的版本

yum -y remove kubelet
yum -y install kubelet-1.14.0 kubeadm-1.14.0

image.png dns是pending状态,其他都是running状态,因为dns是运行在工作节点上的。

使用curl命令请求一下健康检查接口,返回ok代表没问题

[root@jxd-master1 ~]# curl -k https://192.168.3.199:6443/healthz
ok

6 复制配置文件

其他节点需要用到的证书文件:/etc/kubernetes/admin.conf & /etc/kubernetes/pki

  • ca.crt
  • ca.key
  • sa.key
  • sa.pub
  • front-proxy-ca.crt
  • front-proxy-ca.key
  • ca.crt
  • ca.key 拷贝主节点的文件到其他节点:scp -r root@jxd-master1:/etc/kubernetes/pki/ . scp root@jxd-master1:/etc/kubernetes/admin.conf . 删除其他文件:
[root@jxd-master2 pki]# rm -f apiserver*
[root@jxd-master2 pki]# rm -f front-proxy-client.*
[root@jxd-master2 pki]# rm -f etcd/healthcheck-client.* etcd/peer.* etcd/server.*

7 部署第二个主节点

第二个主节点的配置脚本文件:/root/kubernetes-ha-kubeadm/target/scripts/init-master-second.sh

!/bin/bash
# kubelet 引导配置
kubeadm alpha phase certs all --config kubeadm-config.yaml
kubeadm alpha phase kubelet config write-to-disk --config kubeadm-config.yaml
kubeadm alpha phase kubelet write-env-file --config kubeadm-config.yaml
kubeadm alpha phase kubeconfig kubelet --config kubeadm-config.yaml
systemctl start kubelet

sleep 2

# 加入etcd集群
export KUBECONFIG=/etc/kubernetes/admin.conf
kubectl exec -n kube-system etcd-jxd-master1 -- etcdctl --cd-file /etc/kubernetes/pki/etcd/ca.crt --cert-file /etc/kubernetes/pki/etcd/peer.crt --key-file /etc/kubernetes/pki/etcd/peer.key --endpoints=https://192.168.3.180:2379 member add jxd-master2 https://192.168.3.112:2380
sleep 2
kubeadm alpha phase etcd local --config kubeadm-config.yaml
 
sleep 3
# 部署主节点组件
kubeadm alpha phase kubeconfig all --config kubeadm-config.yaml
kubeadm alpha phase controlplane all --config kubeadm-config.yaml
kubeadm alpha phase mark-master --config kubeadm-config.yaml

image.png

# 上传生成的初始化脚本
$ scp target/scripts/init-master-second.sh root@jxd-master2:~

# 在第二个master节点执行初始化脚本
init-master-second.sh

# 查看节点运行情况
$ netstat -ntlp
$ docker ps
$ journalctl -f

# 配置kubectl
$ mkdir -p ~/.kube
$ mv /etc/kubernetes/admin.conf ~/.kube/config

8 部署网络插件

[root@jxd-master1 addons]# kubectl apply -f /etc/kubernetes/addons/calico-rbac-kdd.yaml
[root@jxd-master1 addons]# kubectl apply -f /etc/kubernetes/addons/calico.yaml

查看运行状态: image.png

9 加入worker节点

# 使用之前保存的join命令加入集群
$ kubeadm join 192.168.3.199:6443 --token 8qn6rx.rnariugyxjsd642z \
    --discovery-token-ca-cert-hash sha256:4fd89120cc158a17b9bb898a1cebb879dba88795904a4b9c7de8f66718effa9d \
    --experimental-control-plane --certificate-key 1d18e633630307ac7d4b8ca7c8bec4dbb03fba7be7e6b6270a101fca4d21e651
    
# 耐心等待一会,并观察日志
$ journalctl -f

# 查看节点
$ kubectl get nodes

坑:the server doesn't have a resource type "nodes"

run:kubectl get nodes

ERROR:
the server doesn't have a resource type "nodes"

解决方法:
sudo cp /etc/kubernetes/admin.conf ~/.kube/config

坑:Unable to connect to the server: x509: certificate signed by unknown authority (possibly because of "crypto/rsa: verification error" while trying to verify candidate authority certificate "kubernetes") 解决方法:blog.csdn.net/woay2008/ar…

10 集群可用性测试

10.1 创建nginx ds

 # 写入配置
$ cat > nginx-ds.yml <<EOF
apiVersion: v1
kind: Service
metadata:
  name: nginx-ds
  labels:
    app: nginx-ds
spec:
  type: NodePort
  selector:
    app: nginx-ds
  ports:
  - name: http
    port: 80
    targetPort: 80
---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
  name: nginx-ds
  labels:
    addonmanager.kubernetes.io/mode: Reconcile
spec:
  template:
    metadata:
      labels:
        app: nginx-ds
    spec:
      containers:
      - name: my-nginx
        image: nginx:1.7.9
        ports:
        - containerPort: 80
EOF

# 创建ds
$ kubectl create -f nginx-ds.yml

10.2 检查连通性

# 检查各 Node 上的 Pod IP 连通性
$ kubectl get pods  -o wide

# 在每个节点上ping pod ip
$ ping <pod-ip>

# 检查service可达性
$ kubectl get svc

# 在每个节点上访问服务
$ curl <service-ip>:<port>

# 在每个节点检查node-port可用性
$ curl <node-ip>:<port>

10.3 检查DNS可用性

# 创建一个nginx pod
$ cat > pod-nginx.yaml <<EOF
apiVersion: v1
kind: Pod
metadata:
  name: nginx
spec:
  containers:
  - name: nginx
    image: nginx:1.7.9
    ports:
    - containerPort: 80
EOF

# 创建pod
$ kubectl create -f pod-nginx.yaml

# 进入pod,查看dns
$ kubectl exec  nginx -i -t -- /bin/bash

# 查看dns配置
root@nginx:/# cat /etc/resolv.conf

# 查看名字是否可以正确解析
root@nginx:/# ping nginx-ds

参考资料