1 环境配置
1.1 节点准备
系统类型 | IP地址 | 节点角色 | CPU | Memory | Hostname |
---|---|---|---|---|---|
centos-7.3 | 192.168.3.180 | master | >=2 | >=2GB | master1 |
centos-7.3 | 192.168.3.112 | master | >=2 | >=2GB | master2 |
centos-7.3 | 192.168.3.183 | master | >=2 | >=2GB | master3 |
centos-7.3 | 192.168.3.182 | worker | >=2 | >=2GB | worker1 |
centos-7.3 | 192.168.3.181 | worker | >=2 | >=2GB | worker2 |
1.2 节点配置
设置主机名:主机名必须每个节点都不一样,并且保证所有点之间可以通过hostname互相访问。
# 查看主机名
$ hostname
# 修改主机名
$ hostnamectl set-hostname <your_hostname>
# 配置host,使所有节点之间可以通过hostname互相访问
修改host文件
$ vi /etc/hosts
# <node-ip> <node-hostname>
安装依赖
# 更新yum
$ yum update
# 安装依赖包
$ yum install -y conntrack ipvsadm ipset jq sysstat curl iptables libseccomp
其他配置
# 关闭防火墙
$ systemctl stop firewalld && systemctl disable firewalld
# 重置iptables
$ iptables -F && iptables -X && iptables -F -t nat && iptables -X -t nat && iptables -P FORWARD ACCEPT
# 关闭swap
$ swapoff -a
$ sed -i '/swap/s/^\(.*\)$/#\1/g' /etc/fstab
# 关闭selinux
$ setenforce 0
# 关闭dnsmasq(否则可能导致docker容器无法解析域名)
$ service dnsmasq stop && systemctl disable dnsmasq
系统参数配置
# 制作配置文件
$ cat > /etc/sysctl.d/kubernetes.conf <<EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
vm.swappiness=0
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
EOF
# 生效文件
$ sysctl -p /etc/sysctl.d/kubernetes.conf
坑1:cannot stat /proc/sys/net/bridge/bridge-nf-call-iptables: No such file or directory
解决方法:modprobe br_netfilter
2 安装Docker
$ yum install https://download.docker.com/linux/centos/7/x86_64/stable/Packages/docker-ce-selinux-17.03.1.ce-1.el7.centos.noarch.rpm
$ yum install https://download.docker.com/linux/centos/7/x86_64/stable/Packages/docker-ce-17.03.1.ce-1.el7.centos.x86_64.rpm
# 开机启动
$ systemctl enable docker
# 设置参数
# 1.查看磁盘挂载
$ df -h
Filesystem Size Used Avail Use% Mounted on
/dev/sda2 98G 2.8G 95G 3% /
devtmpfs 63G 0 63G 0% /dev
/dev/sda5 1015G 8.8G 1006G 1% /tol
/dev/sda1 197M 161M 37M 82% /boot
# 2.设置docker启动参数
# - 设置docker数据目录:选择比较大的分区(我这里是根目录就不需要配置了,默认为/var/lib/docker)
# - 设置cgroup driver(默认是cgroupfs,主要目的是与kubelet配置统一,这里也可以不设置后面在kubelet中指定cgroupfs)
$ cat <<EOF > /etc/docker/daemon.json
{
"graph": "/docker/data/path",
"exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
# 启动docker服务
service docker restart
3 安装工具
# 配置yum源
$ cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=http://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=http://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg
http://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
# 安装工具
# 找到要安装的版本号
$ yum list kubeadm --showduplicates | sort -r
# 安装指定版本(这里用的是1.14.0)
$ yum install -y kubeadm-1.14.0-0 kubelet-1.14.0-0 kubectl-1.14.0-0 --disableexcludes=kubernetes
# 查找kubelet.service.d的位置
$ find / -name "kubelet*"
# 设置kubelet的cgroupdriver(kubelet的cgroupdriver默认为systemd,如果上面没有设置docker的exec-opts为systemd,这里就需要将kubelet的设置为cgroupfs)
$ sed -i "s/cgroup-driver=systemd/cgroup-driver=cgroupfs/g" /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf
# 启动kubelet
$ systemctl enable kubelet && systemctl start kubelet
坑2:缺少依赖
yum -y install kubernetes-cni = 0.7.5
4 准备配置文件(Master1)
$ cd ~ && git clone https://gitee.com/admxj/kubernetes-ha-kubeadm.git
addons:kubernetes的插件,比如calico和dashboard。
configs:包含了部署集群过程中用到的各种配置文件。
scripts:包含部署集群过程中用到的脚本,如keepalive检查脚本。
global-configs.properties:全局配置,包含各种易变的配置内容。
init.sh:初始化脚本,配置好global-config之后,会自动生成所有配置文件。
#kubernetes版本
VERSION=v1.14.0
#POD网段
POD_CIDR=172.22.0.0/16
#master虚拟ip
MASTER_VIP=192.168.8.188
#keepalived用到的网卡接口名
VIP_IF=ens33
# 生成配置文件,确保执行过程没有异常信息
$ ./init.sh
# 查看生成的配置文件,确保脚本执行成功
$ find target/ -type f
坑3:-bash: ./init.sh: Permission denied
chmod 777 init.sh
[root@jxd-master1 kubernetes-ha-kubeadm]# ./init.sh
====替换变量列表====
VERSION=v1.14.0
POD_CIDR=172.22.0.0/16
MASTER_VIP=192.168.3.199
VIP_IF=ens1
====替换脚本====
scripts/check-apiserver.sh
====替换配置文件====
configs/keepalived-backup.conf
configs/keepalived-master.conf
configs/kubeadm-config.yaml
addons/calico-rbac-kdd.yaml
addons/calico.yaml
addons/dashboard-all.yaml
配置生成成功,位置: /root/kubernetes-ha-kubeadm/target
5 搭建高可用集群(Master1、Master2)
Master1作为主节点,Master2作为备用节点
5.1 安装keepalived
yum install -y keepalived
创建keepalived配置文件,使用xshell工具将命令发送到所有会话
# 创建目录
$ mkdir -p /etc/keepalived
# 分发配置文件
$ cp target/configs/keepalived-master.conf /etc/keepalived/keepalived.conf
# 分发监测脚本
$ cp target/scripts/check-apiserver.sh /etc/keepalived/
keepalived配置文件:
! Configuration File for keepalived
global_defs {
router_id keepalive-master
}
vrrp_script check_apiserver {
script "/etc/keepalived/check-apiserver.sh"
interval 3 # 每隔3s执行一次
weight -2 # 权重-2
}
vrrp_instance VI-kube-master {
state MASTER
interface ens1
virtual_router_id 68
priority 100
dont_track_primary
advert_int 3
virtual_ipaddress { # api server
192.168.3.199
}
track_script { # 检查的脚本
check_apiserver
}
}
5.2 启动keepalived
# 分别在master和backup上启动服务
$ systemctl enable keepalived && service keepalived start
# 检查状态
$ service keepalived status
# 查看日志
$ journalctl -f -u keepalived
# 查看虚拟ip
$ ip a
keepalived.service处于active的状态
虚拟IP绑定到了eth1网卡上
可以ping通虚拟IP
5.3 部署第一个主节点
# 准备配置文件
$ cp target/configs/kubeadm-config.yaml ~
# ssh到第一个主节点,执行kubeadm初始化系统(注意保存最后打印的加入集群的命令)
$ kubeadm init --config=kubeadm-config.yaml --experimental-upload-certs
# copy kubectl配置(上一步会有提示)
$ mkdir -p ~/.kube
$ cp -i /etc/kubernetes/admin.conf ~/.kube/config
# 测试一下kubectl
$ kubectl get pods --all-namespaces
注意:需要将init打印的join命令保存下来,之后加入worker节点的时候需要使用到。
kubeadm join 192.168.3.199:6443 --token 8qn6rx.rnariugyxjsd642z \
--discovery-token-ca-cert-hash sha256:4fd89120cc158a17b9bb898a1cebb879dba88795904a4b9c7de8f66718effa9d \
--experimental-control-plane --certificate-key 1d18e633630307ac7d4b8ca7c8bec4dbb03fba7be7e6b6270a101fca4d21e651
坑4:kbeadm初始化系统,报错:版本不匹配
error execution phase preflight: [preflight] Some fatal errors occurred:
[ERROR KubeletVersion]: the kubelet version is higher than the control plane version. This is not a supported version skew and may lead to a malfunctional cluster. Kubelet version: "1.20.5" Control plane version: "1.14.0"
[preflight] If you know what you are doing, you can make a check non-fatal with `--ignore-preflight-errors=...`
解决方法:重新安装kubelet(奇怪的是之前安装的时候明明指定了kubelet的版本
yum -y remove kubelet
yum -y install kubelet-1.14.0 kubeadm-1.14.0
dns是pending状态,其他都是running状态,因为dns是运行在工作节点上的。
使用curl命令请求一下健康检查接口,返回ok代表没问题
[root@jxd-master1 ~]# curl -k https://192.168.3.199:6443/healthz
ok
6 复制配置文件
其他节点需要用到的证书文件:/etc/kubernetes/admin.conf & /etc/kubernetes/pki
- ca.crt
- ca.key
- sa.key
- sa.pub
- front-proxy-ca.crt
- front-proxy-ca.key
- ca.crt
- ca.key
拷贝主节点的文件到其他节点:
scp -r root@jxd-master1:/etc/kubernetes/pki/ .
scp root@jxd-master1:/etc/kubernetes/admin.conf .
删除其他文件:
[root@jxd-master2 pki]# rm -f apiserver*
[root@jxd-master2 pki]# rm -f front-proxy-client.*
[root@jxd-master2 pki]# rm -f etcd/healthcheck-client.* etcd/peer.* etcd/server.*
7 部署第二个主节点
第二个主节点的配置脚本文件:/root/kubernetes-ha-kubeadm/target/scripts/init-master-second.sh
!/bin/bash
# kubelet 引导配置
kubeadm alpha phase certs all --config kubeadm-config.yaml
kubeadm alpha phase kubelet config write-to-disk --config kubeadm-config.yaml
kubeadm alpha phase kubelet write-env-file --config kubeadm-config.yaml
kubeadm alpha phase kubeconfig kubelet --config kubeadm-config.yaml
systemctl start kubelet
sleep 2
# 加入etcd集群
export KUBECONFIG=/etc/kubernetes/admin.conf
kubectl exec -n kube-system etcd-jxd-master1 -- etcdctl --cd-file /etc/kubernetes/pki/etcd/ca.crt --cert-file /etc/kubernetes/pki/etcd/peer.crt --key-file /etc/kubernetes/pki/etcd/peer.key --endpoints=https://192.168.3.180:2379 member add jxd-master2 https://192.168.3.112:2380
sleep 2
kubeadm alpha phase etcd local --config kubeadm-config.yaml
sleep 3
# 部署主节点组件
kubeadm alpha phase kubeconfig all --config kubeadm-config.yaml
kubeadm alpha phase controlplane all --config kubeadm-config.yaml
kubeadm alpha phase mark-master --config kubeadm-config.yaml
# 上传生成的初始化脚本
$ scp target/scripts/init-master-second.sh root@jxd-master2:~
# 在第二个master节点执行初始化脚本
init-master-second.sh
# 查看节点运行情况
$ netstat -ntlp
$ docker ps
$ journalctl -f
# 配置kubectl
$ mkdir -p ~/.kube
$ mv /etc/kubernetes/admin.conf ~/.kube/config
8 部署网络插件
[root@jxd-master1 addons]# kubectl apply -f /etc/kubernetes/addons/calico-rbac-kdd.yaml
[root@jxd-master1 addons]# kubectl apply -f /etc/kubernetes/addons/calico.yaml
查看运行状态:
9 加入worker节点
# 使用之前保存的join命令加入集群
$ kubeadm join 192.168.3.199:6443 --token 8qn6rx.rnariugyxjsd642z \
--discovery-token-ca-cert-hash sha256:4fd89120cc158a17b9bb898a1cebb879dba88795904a4b9c7de8f66718effa9d \
--experimental-control-plane --certificate-key 1d18e633630307ac7d4b8ca7c8bec4dbb03fba7be7e6b6270a101fca4d21e651
# 耐心等待一会,并观察日志
$ journalctl -f
# 查看节点
$ kubectl get nodes
坑:the server doesn't have a resource type "nodes"
run:kubectl get nodes
ERROR:
the server doesn't have a resource type "nodes"
解决方法:
sudo cp /etc/kubernetes/admin.conf ~/.kube/config
坑:Unable to connect to the server: x509: certificate signed by unknown authority (possibly because of "crypto/rsa: verification error" while trying to verify candidate authority certificate "kubernetes") 解决方法:blog.csdn.net/woay2008/ar…
10 集群可用性测试
10.1 创建nginx ds
# 写入配置
$ cat > nginx-ds.yml <<EOF
apiVersion: v1
kind: Service
metadata:
name: nginx-ds
labels:
app: nginx-ds
spec:
type: NodePort
selector:
app: nginx-ds
ports:
- name: http
port: 80
targetPort: 80
---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: nginx-ds
labels:
addonmanager.kubernetes.io/mode: Reconcile
spec:
template:
metadata:
labels:
app: nginx-ds
spec:
containers:
- name: my-nginx
image: nginx:1.7.9
ports:
- containerPort: 80
EOF
# 创建ds
$ kubectl create -f nginx-ds.yml
10.2 检查连通性
# 检查各 Node 上的 Pod IP 连通性
$ kubectl get pods -o wide
# 在每个节点上ping pod ip
$ ping <pod-ip>
# 检查service可达性
$ kubectl get svc
# 在每个节点上访问服务
$ curl <service-ip>:<port>
# 在每个节点检查node-port可用性
$ curl <node-ip>:<port>
10.3 检查DNS可用性
# 创建一个nginx pod
$ cat > pod-nginx.yaml <<EOF
apiVersion: v1
kind: Pod
metadata:
name: nginx
spec:
containers:
- name: nginx
image: nginx:1.7.9
ports:
- containerPort: 80
EOF
# 创建pod
$ kubectl create -f pod-nginx.yaml
# 进入pod,查看dns
$ kubectl exec nginx -i -t -- /bin/bash
# 查看dns配置
root@nginx:/# cat /etc/resolv.conf
# 查看名字是否可以正确解析
root@nginx:/# ping nginx-ds