Ansible部署K8s高可用集群(小节6)

250 阅读3分钟

101Master1

移走或删除

mv /etc/ansible/* /tmp/

拷贝

cp -rf kubeasz/* /etc/ansible/
cd /etc/ansible/
#帮助文档
cat README.md

cp example/hosts.m-masters.example ./hosts

参考文档中部署步骤github.com/easzlab/kub…

每个节点安装依赖工具安装python2.7

Master{101\102}-etcd{105\106\107}-node{110\111}

安装python2
apt-get install python2.7 -y

#软连接
ln -s /usr/bin/python2.7 /usr/bin/python

Master101

下载k8s: 在/etc/ansible/down/download.sh查看后、变量改成版本号即可

上传文件:k8s.1-13-5.tar.gz

cd /etc/ansible/bin
#解压缩
tar xvf k8s.1-13-5.tar.gz
#由于是嵌套关系、所以移动到当前目录下
mv bin/* .

修改文件(根据实际情况)

pwd
/etc/ansible

#修改文件(根据实际情况)
vim hosts
# 集群部署节点:一般为运行ansible 脚本的节点
# 变量 NTP_ENABLED (=yes/no) 设置集群是否安装 chrony 时间同步
[deploy]
192.168.37.101 NTP_ENABLED=no

# etcd集群请提供如下NODE_NAME,注意etcd集群必须是1,3,5,7...奇数个节点
[etcd]
192.168.37.105 NODE_NAME=etcd1
192.168.37.106 NODE_NAME=etcd2
192.168.37.107 NODE_NAME=etcd3

[new-etcd] # 预留组,后续添加etcd节点使用
#192.168.1.x NODE_NAME=etcdx

[kube-master]
192.168.37.101

[new-master] # 预留组,后续添加master节点使用
192.168.37.102

[kube-node]
192.168.37.110

[new-node] # 预留组,后续添加node节点使用
192.168.37.111

# 参数 NEW_INSTALL:yes表示新建,no表示使用已有harbor服务器
# 如果不使用域名,可以设置 HARBOR_DOMAIN=""
[harbor]
#192.168.1.8 HARBOR_DOMAIN="harbor.yourdomain.com" NEW_INSTALL=no

# 负载均衡(目前已支持多于2节点,一般2节点就够了) 安装 haproxy+keepalived
[lb]
192.168.1.1 LB_ROLE=backup
192.168.1.2 LB_ROLE=master

#【可选】外部负载均衡,用于自有环境负载转发 NodePort 暴露的服务等
[ex-lb]
#192.168.1.6 LB_ROLE=backup EX_VIP=192.168.1.250
#192.168.1.7 LB_ROLE=master EX_VIP=192.168.1.250

[all:vars]
# ---------集群主要参数---------------
#集群部署模式:allinone, single-master, multi-master
DEPLOY_MODE=multi-master

#集群主版本号,目前支持: v1.8, v1.9, v1.10,v1.11, v1.12, v1.13
K8S_VER="v1.13"

# 集群 MASTER IP即 LB节点VIP地址,为区别与默认apiserver端口,设置VIP监听的服务端口8443
# 公有云上请使用云负载均衡内网地址和监听端口
MASTER_IP="192.168.37.240"
KUBE_APISERVER="https://{{ MASTER_IP }}:6443"

# 集群网络插件,目前支持calico, flannel, kube-router, cilium
CLUSTER_NETWORK="calico"

# 服务网段 (Service CIDR),注意不要与内网已有网段冲突
SERVICE_CIDR="10.20.0.0/16"

# POD 网段 (Cluster CIDR),注意不要与内网已有网段冲突
CLUSTER_CIDR="172.20.0.0/16"

# 服务端口范围 (NodePort Range)
NODE_PORT_RANGE="30000-65000"

# kubernetes 服务 IP (预分配,一般是 SERVICE_CIDR 中第一个IP)
CLUSTER_KUBERNETES_SVC_IP="10.20.0.1"

# 集群 DNS 服务 IP (从 SERVICE_CIDR 中预分配)
CLUSTER_DNS_SVC_IP="10.20.254.254"

# 集群 DNS 域名
CLUSTER_DNS_DOMAIN="linux01.local."

# 集群basic auth 使用的用户名和密码
BASIC_AUTH_USER="admin"
BASIC_AUTH_PASS="123456"

# ---------附加参数--------------------
#默认二进制文件目录
bin_dir="/usr/bin"

#证书目录
ca_dir="/etc/kubernetes/ssl"

#部署目录,即 ansible 工作目录,建议不要修改
base_dir="/etc/ansible"

验证ansible 安装:ansible all -m ping 正常能看到节点返回 SUCCESS

ansible all -m ping
pwd
/etc/ansible

vim roles/prepare/templates/30-k8s-ulimits.conf.j2

* soft nofile 100000
* hard nofile 100000
* soft nproc 100000
* hard nproc 100000

注释掉、或删除

vim 01.prepare.yml
...
# prepare tasks for all nodes
- hosts:
  - kube-master
  - kube-node
  - deploy
  - etcd
#  - lb    <--注释此项

01-创建证书和安装准备

# 分步安装
## 1.在 /etc/kubernetes/ssl/ 可看一堆证书
ansible-playbook 01.prepare.yml

etcd下载地址:github.com/etcd-io/etc…

解压软件:etcd-v3.3.27-linux-amd64.tar.gz后、上传文件中的文件、如下

cd /opt

#上传文件中的etcd*文件
ll etcd*
-rw-r--r--  1 root root 22480128 Oct 15  2021 etcd   <--
-rw-r--r--  1 root root 18045152 Oct 15  2021 etcdctl   <--

#添加权限
chmod a+x etcd*

#执行一下、可看到版本号等信息
./etcd --version
etcd Version: 3.3.27
Git SHA: 973882f69
Go Version: go1.12.17
Go OS/Arch: linux/amd64

#替换
mv etcd* /etc/ansible/bin/

#检查是否替换
/etc/ansible/bin/etcd --version
etcd Version: 3.3.27
Git SHA: 973882f69
Go Version: go1.12.17
Go OS/Arch: linux/amd64

02-安装etcd集群

cd /etc/ansible

## 2.部署etcd
ansible-playbook 02.etcd.yml

验证etcd是否正常、可在etcd{105/106/107}任意主机

如在 105

# ps -ef |grep etcd# export NODE_IPS="192.168.37.105 192.168.37.106 192.168.37.107"
#返回3个'successfully'
# for ip in ${NODE_IPS};do  ETCDCTL_API=3 /usr/bin/etcdctl --endpoints=https://${ip}:2379 --cacert=/etc/kubernetes/ssl/ca.pem --cert=/etc/etcd/ssl/etcd.pem --key=/etc/etcd/ssl/etcd-key.pem endpoint health; done
https://192.168.37.105:2379 is healthy: successfully committed proposal: took = 6.280979ms
https://192.168.37.106:2379 is healthy: successfully committed proposal: took = 6.685073ms
https://192.168.37.107:2379 is healthy: successfully committed proposal: took = 6.405382ms

Master101(03.docker.yml手动装过、所以此处跳过)

04-安装master节点

./bin/kube-apiserver --version
Kubernetes v1.13.5   <--
# ansible-playbook 04.kube-master.yml

#查看状态
# kubectl get node
NAME             STATUS                     ROLES    AGE     VERSION
192.168.37.101   Ready,SchedulingDisabled   master   4m10s   v1.13.5

node{110\111}

安装docker脚本\执行脚本

bash /opt/docker-install.sh

能否能下载镜像请在node110中测试、node111是在预留组中

docker pull harbor.123.com/linux01/alpine:v2

#查看镜像
docker images
REPOSITORY                      TAG                 IMAGE ID            CREATED             SIZE
harbor.123.com/linux01/alpine   v2                  b2aa39c304c2        3 weeks ago         7.05MB

Master101

05-安装node节点

ansible-playbook 05.kube-node.yml

添加了新的node节点

kubectl get node
NAME             STATUS                     ROLES    AGE   VERSION
192.168.37.101   Ready,SchedulingDisabled   master   15m   v1.13.5
192.168.37.110   Ready                      node     97s   v1.13.5

06-安装集群网络

cd /etc/ansible/
ansible-playbook 06.network.yml

calicoctl命令简介

检查整个calico网络的状态

# calicoctl node status
Calico process is running.

IPv4 BGP status
+----------------+-------------------+-------+----------+-------------+
|  PEER ADDRESS  |     PEER TYPE     | STATE |  SINCE   |    INFO     |
+----------------+-------------------+-------+----------+-------------+
| 192.168.37.110 | node-to-node mesh | up    | 05:36:00 | Established |
+----------------+-------------------+-------+----------+-------------+

IPv6 BGP status
No IPv6 peers found.
cp /usr/bin/docker* /etc/ansible/bin/
cp /usr/bin/containerd* /etc/ansible/bin/

添加node、已在'/etc/ansible/hosts'中添加

# kubectl get nodes
NAME             STATUS                     ROLES    AGE    VERSION
192.168.37.101   Ready,SchedulingDisabled   master   151m   v1.13.5
192.168.37.110   Ready                      node     137m   v1.13.5

改为本地镜像下载

图片.png

node1110

网络插件

#拉取镜像
docker pull mirrorgooglecontainers/pause-amd64:3.1

#修改tag号
docker tag mirrorgooglecontainers/pause-amd64:3.1 harbor.123.com/baseimages/pause-amd64:3.1

#登录 账号:admin 密码:123456
docker login harbor.123.com

#上传
docker push harbor.123.com/baseimages/pause-amd64:3.1

Master101

vim ./roles/kube-node/defaults/main.yml

# 默认使用kube-proxy的 'iptables' 模式,可选 'ipvs' 模式(experimental)
PROXY_MODE: "iptables"

# 基础容器镜像
SANDBOX_IMAGE: "harbor.123.com/baseimages/pause-amd64:3.1"    <--改成内部地址
#SANDBOX_IMAGE: "mirrorgooglecontainers/pause-amd64:3.1"    <--注释掉
#SANDBOX_IMAGE: "registry.access.redhat.com/rhel7/pod-infrastructure:latest"

# Kubelet 根目录
KUBELET_ROOT_DIR: "/var/lib/kubelet"

# node节点最大pod 数
MAX_PODS: 110

修改配置

vim /etc/systemd/system/kubelet.service

  --max-pods=5000 \
#  --pod-infra-container-image=mirrorgooglecontainers/pause-amd64:3.1 \      <--注释掉
  --pod-infra-container-image=harbor.123.com/baseimages/pause-amd64:3.1 \    <--修改此行

重启服务

systemctl daemon-reload
systemctl restart kubelet

calico镜像下载地址:github.com/projectcali…

点击下载release-v3.4.4.tgz

上传后解压、打镜像

cd /opt
tar xvf release-v3.4.4.tgz
cd release-v3.4.4/images

#导入镜像1
docker load -i calico-cni.tar
#修改tag号
docker tag calico/cni:v3.4.4 harbor.123.com/baseimages/calico-cni:v3.4.4
#上传镜像
docker push harbor.123.com/baseimages/calico-cni:v3.4.4

#导入镜像2
docker load -i calico-node.tar
#修改tag号
docker tag calico/node:v3.4.4 harbor.123.com/baseimages/calico-node:v3.4.4
#上传镜像
docker push harbor.123.com/baseimages/calico-node:v3.4.4

#导入镜像3
docker load -i calico-kube-controllers.tar
#修改tag号
docker tag calico/kube-controllers:v3.4.4 harbor.123.com/baseimages/kube-controllers:v3.4.4
#上传镜像
docker push harbor.123.com/baseimages/kube-controllers:v3.4.4

此步骤不要出错、否则后期排错非常麻烦!!!

vim /etc/ansible/roles/calico/templates/calico-v3.4.yaml.j2

        - name: install-cni
        #注释掉下行后、添加新的
        #  image: calico/cni:{{ calico_ver }}
          image: harbor.123.com/baseimages/calico-cni:v3.4.4
          
        - name: calico-node
        #注释掉下行后、添加新的
        #  image: calico/node:{{ calico_ver }}
          image: harbor.123.com/baseimages/calico-node:v3.4.4
          
        - name: calico-kube-controllers
        #注释掉下行后、添加新的
        #  image: calico/kube-controllers:{{ calico_ver }}        
          image: harbor.123.com/baseimages/kube-controllers:v3.4.4

添加node

cd /etc/ansible/
#添加node
ansible-playbook 20.addnode.yml
kubectl get nodes
NAME             STATUS                     ROLES    AGE     VERSION
192.168.37.101   Ready,SchedulingDisabled   master   4h      v1.13.5
192.168.37.110   Ready                      node     3h46m   v1.13.5
192.168.37.111   Ready                      node     87m     v1.13.5

node111

查看是否下载变成内网

cat /etc/systemd/system/kubelet.service |grep image
  --pod-infra-container-image=harbor.123.com/baseimages/pause-amd64:3.1 \

Master101

添加Master、注释掉下面选项

vim 21.addmaster.yml 

# reconfigure and restart the haproxy service
#- hosts: lb    <--
#  roles:    <--
#  - lb    <--

添加Master

# ansible-playbook 21.addmaster.yml

# calicoctl node status
Calico process is running.

IPv4 BGP status
+----------------+-------------------+-------+----------+-------------+
|  PEER ADDRESS  |     PEER TYPE     | STATE |  SINCE   |    INFO     |
+----------------+-------------------+-------+----------+-------------+
| 192.168.37.110 | node-to-node mesh | up    | 16:45:41 | Established |
| 192.168.37.111 | node-to-node mesh | up    | 17:01:42 | Established |
| 192.168.37.102 | node-to-node mesh | up    | 17:15:32 | Established |
+----------------+-------------------+-------+----------+-------------+

IPv6 BGP status
No IPv6 peers found.

测试网络能否通信

#1.
# kubectl run net-test --image=alpine --replicas=4 sleep 360000
kubectl run --generator=deployment/apps.v1 is DEPRECATED and will be removed in a future version. Use kubectl run --generator=run-pod/v1 or kubectl create instead.
deployment.apps/net-test created

#2.
# kubectl get pod
NAME                       READY   STATUS              RESTARTS   AGE
net-test-cd766cb69-24lgk   0/1     ContainerCreating   0          12s
net-test-cd766cb69-46dnb   1/1     Running             0          12s
net-test-cd766cb69-mwwlt   1/1     Running             0          12s
net-test-cd766cb69-snv44   0/1     ContainerCreating   0          12s

#3.
# kubectl get pod -o wide
NAME                       READY   STATUS    RESTARTS   AGE   IP               NODE             NOMINATED NODE   READINESS GATES
net-test-cd766cb69-24lgk   1/1     Running   0          46s   172.20.104.1     192.168.37.111   <none>           <none>
net-test-cd766cb69-46dnb   1/1     Running   0          46s   172.20.166.129   192.168.37.110   <none>           <none>
net-test-cd766cb69-mwwlt   1/1     Running   0          46s   172.20.166.130   192.168.37.110   <none>           <none>
net-test-cd766cb69-snv44   1/1     Running   0          46s   172.20.104.2     192.168.37.111   <none>           <none>

#4.进到其中一个
# kubectl exec -it net-test-cd766cb69-24lgk sh

#5.查看IP
/ # ifconfig |grep 172.20.104.1
          inet addr:172.20.104.1  Bcast:0.0.0.0  Mask:255.255.255.255
          
#6.可以ping通其他主机
/ # ping 172.20.166.129 -c 2

#7.ping外网(域名ping不通、因为没做DNS解析)
/ # ping 8.8.8.8 -c 2