系统配置
1.在所有节点上开启ip_forward并禁用IPv6
# 所有节点配置ip转发
cat <<EOF > /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
# 让配置生效
sysctl --system
2.关闭Swap
swapoff -a && sysctl -w vm.swappiness=0
#防止开机自动挂载 swap 分区
sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
或 sed -ri '/^[^#]*swap/s@^@#@' /etc/fstab
3.在所有节点上设置selinux为permissive模式
# 修改配置
vim /etc/selinux/config
SELINUX=permissive
setenforce 0
安装docker
在你的每台机器上安装 Docker,推荐安装 19.03.11 版本。
curl -fsSL https://get.docker.com | bash -s docker --mirror Aliyun
修改docker的cgroupdriver类型为systemd。cgroups和systemd的区别
# 设置 daemon
cat > /etc/docker/daemon.json <<EOF
{
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2"
}
EOF
mkdir -p /etc/systemd/system/docker.service.d
# 重启 docker.
systemctl daemon-reload
systemctl restart docker
# 开机启动
sudo systemctl enable docker
验证docker版本和cgroupdriver类型
wangxw@wangxw-virtual-machine:~/Desktop$ sudo docker version
Client: Docker Engine - Community
Version: 19.03.13
API version: 1.40
Go version: go1.13.15
Git commit: 4484c46d9d
Built: Wed Sep 16 17:02:52 2020
OS/Arch: linux/amd64
Experimental: false
wangxw@wangxw-virtual-machine:~/Desktop$ sudo docker info | grep Cgroup
Cgroup Driver: systemd
安装kubeadm、kubelet、kubectl
安装kubectl
# 需要注意版本对应,这里直接安装最新版本
sudo apt-get update && sudo apt-get install -y apt-transport-https curl
curl -s https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | sudo apt-key add -
cat <<EOF | sudo tee /etc/apt/sources.list.d/kubernetes.list
deb https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main
EOF
sudo apt-get update
sudo apt-get install -y kubelet=1.18.0-00 kubeadm=1.18.0-00 kubectl=1.18.0-00
sudo apt-mark hold kubelet=1.18.0-00 kubeadm=1.18.0-00 kubectl=1.18.0-00
提前拉取k8s镜像
#拉镜像
kubeadm config images list |sed -e 's/^/docker pull /g' -e 's#k8s.gcr.io#registry.aliyuncs.com/google_containers#g'|sh -x
#修改tag,将镜像标记为k8s.gcr.io的名称
docker images |grep google_containers|awk '{print "docker tag ",$1":"$2,$1":"$2}' |sed -e 's#registry.aliyuncs.com/google_containers#k8s.gcr.io#2' |sh -x
#删除无用的镜像
docker images | grep google_containers| awk '{print "docker rmi " $1":"$2}' | sh -x
## 查看镜像
root@ubuntu:/home/root2/Desktop# docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
quay.io/coreos/flannel v0.13.0-rc2 79dd6d6368e2 3 weeks ago 57.2MB
k8s.gcr.io/kube-proxy v1.18.0 43940c34f24f 6 months ago 117MB
k8s.gcr.io/kube-apiserver v1.18.0 74060cea7f70 6 months ago 173MB
k8s.gcr.io/kube-controller-manager v1.18.0 d3e55153f52f 6 months ago 162MB
k8s.gcr.io/kube-scheduler v1.18.0 a31f78c7c8ce 6 months ago 95.3MB
k8s.gcr.io/pause 3.2 80d28bedfe5d 8 months ago 683kB
k8s.gcr.io/coredns 1.6.7 67da37a9a360 8 months ago 43.8MB
k8s.gcr.io/etcd 3.4.3-0 303ce5db0e90 11 months ago 288MB
初始化master
kubeadm init <args>
root@ubuntu:/home/root2/Desktop# kubeadm init
W1013 12:57:51.437142 14578 version.go:102] could not fetch a Kubernetes version from the internet: unable to get URL "https://dl.k8s.io/release/stable-1.txt": Get https://storage.googleapis.com/kubernetes-release/release/stable-1.txt: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)
W1013 12:57:51.437261 14578 version.go:103] falling back to the local client version: v1.18.0
W1013 12:57:51.437425 14578 configset.go:202] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
[init] Using Kubernetes version: v1.18.0
[preflight] Running pre-flight checks
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [ubuntu kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.2.131]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [ubuntu localhost] and IPs [192.168.2.131 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [ubuntu localhost] and IPs [192.168.2.131 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
W1013 12:57:54.777363 14578 manifests.go:225] the default kube-apiserver authorization-mode is "Node,RBAC"; using "Node,RBAC"
[control-plane] Creating static Pod manifest for "kube-scheduler"
W1013 12:57:54.778385 14578 manifests.go:225] the default kube-apiserver authorization-mode is "Node,RBAC"; using "Node,RBAC"
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[apiclient] All control plane components are healthy after 22.507181 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config-1.18" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Skipping phase. Please see --upload-certs
[mark-control-plane] Marking the node ubuntu as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node ubuntu as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
[bootstrap-token] Using token: 348gbr.yotf8hroeq58asni
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.2.131:6443 --token 348gbr.yotf8hroeq58asni \
--discovery-token-ca-cert-hash sha256:a28ea4a7c45b4e2c634498a87dbebee4155476c9fa9c92cd294636e067d883a1
查看证书和签名
# 默认生成的证书,可以通过--cert-dir参数指定自定义的证书
root@node1-virtual-machine:# ls /etc/kubernetes/pki
apiserver.crt apiserver-etcd-client.key apiserver-kubelet-client.crt ca.crt etcd front-proxy-ca.key front-proxy-client.key sa.pub
apiserver-etcd-client.crt apiserver.key apiserver-kubelet-client.key ca.key front-proxy-ca.crt front-proxy-client.crt sa.key
配置集群网络插件
# 查看master节点,发现master节点处于NotReady状态,因为我们还没有为集群设置网络
root@ubuntu:/home/root2/Desktop# kubectl get nodes
NAME STATUS ROLES AGE VERSION
ubuntu NotReady master 4m2s v1.18.0
安装Flannel网络插件
wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
flannel会默认选择第一张网卡,如果宿主机有多张网卡,需要配置kube-flannel.ymlcontainers部分
root@ubuntu:/home/root2/Desktop# vim kube-flannel.yml
containers:
- name: kube-flannel
image: quay.io/coreos/flannel:v0.10.0-amd64
command:
- /opt/bin/flanneld
args:
- --ip-masq
- --kube-subnet-mgr
- --iface=ens33 #添加
执行kube-flannel.yml
root@ubuntu:/home/root2/Desktop# kubectl apply -f kube-flannel.yml
坑:修改pod cidr支持CNI插件
过几分钟后查看集群pod状态,发现flannel没有起来,如下
root@ubuntu:/home/root2/Desktop# kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-66bff467f8-4rm6p 0/1 ContainerCreating 0 7m
coredns-66bff467f8-swj2v 0/1 ContainerCreating 0 7m
etcd-ubuntu 1/1 Running 0 7m
kube-apiserver-ubuntu 1/1 Running 0 7m
kube-controller-manager-ubuntu 1/1 Running 0 7m
kube-flannel-ds-62l9t 0/1 CrashLoopBackOff 2 59s
kube-proxy-nbgvm 1/1 Running 0 7m
kube-scheduler-ubuntu 1/1 Running 0 7m
查看pod kube-flannel-ds-62l9t日志
root@ubuntu:/home/root2/Desktop# kubectl logs kube-flannel-ds-62l9t -n kube-system
I1013 06:07:46.724199 1 main.go:531] Using interface with name ens33 and address 192.168.2.131
I1013 06:07:46.724298 1 main.go:548] Defaulting external address to interface address (192.168.2.131)
W1013 06:07:46.724320 1 client_config.go:517] Neither --kubeconfig nor --master was specified. Using the inClusterConfig. This might not work.
I1013 06:07:46.919480 1 kube.go:119] Waiting 10m0s for node controller to sync
I1013 06:07:46.919572 1 kube.go:306] Starting kube subnet manager
I1013 06:07:47.919837 1 kube.go:126] Node controller sync successful
I1013 06:07:47.919881 1 main.go:246] Created subnet manager: Kubernetes Subnet Manager - ubuntu
I1013 06:07:47.919889 1 main.go:249] Installing signal handlers
I1013 06:07:47.920071 1 main.go:390] Found network config - Backend type: vxlan
I1013 06:07:47.920281 1 vxlan.go:121] VXLAN config: VNI=1 Port=0 GBP=false Learning=false DirectRouting=false
E1013 06:07:47.920970 1 main.go:291] Error registering network: failed to acquire lease: node "ubuntu" pod cidr not assigned
I1013 06:07:47.921071 1 main.go:370] Stopping shutdownHandler...
提示nodeubnutupod的cidr没有被分配
Error registering network: failed to acquire lease: node "ubuntu" pod cidr not assigned
查阅flannel文档发现,在初始化集群的时候需要为集群管理的pod划分子网,如下:
我们可以通过以下方式在集群初始化后修改集群管理Pod的CIDR。
但是我通过这种方式修改集群POD后,发现master节点的coredns无法启动。所以我只能通过kubeadm reset重置集群,重新使用kubeadm init初始化集群了。
将woker节点节点加入集群
在初始化master后,按照上述步骤同样在woker节点安装kubeadm、kubectl、kubelet、dokcer。安装完成后执行
kubeadm join 192.168.2.131:6443 --token 348gbr.yotf8hroeq58asni \
--discovery-token-ca-cert-hash sha256:a28ea4a7c45b4e2c634498a87dbebee4155476c9fa9c92cd294636e067d883a1
将worker节点加入集群,如果忘记了添加work node的命令可以通过
kubeadm token create --print-join-command
找到。
验证集群是否正常
root@ubuntu:/home/root2/Desktop# kubectl get node -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
ubuntu Ready master 42h v1.18.0 192.168.2.131 <none> Ubuntu 20.04.1 LTS 5.4.0-48-generic docker://19.3.13
work1 Ready <none> 23h v1.18.0 192.168.2.132 <none> Ubuntu 20.04.1 LTS 5.4.0-42-generic docker://19.3.13
work2 Ready <none> 3h26m v1.18.0 192.168.2.133 <none> Ubuntu 20.04.1 LTS 5.4.0-51-generic docker://19.3.13
root@ubuntu:/home/root2/Desktop# kubectl get pod -n kube-system -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
coredns-66bff467f8-78lsr 1/1 Running 0 42h 10.244.0.3 ubuntu <none> <none>
coredns-66bff467f8-l6nts 1/1 Running 0 42h 10.244.0.2 ubuntu <none> <none>
etcd-ubuntu 1/1 Running 0 42h 192.168.2.131 ubuntu <none> <none>
kube-apiserver-ubuntu 1/1 Running 0 42h 192.168.2.131 ubuntu <none> <none>
kube-controller-manager-ubuntu 1/1 Running 0 42h 192.168.2.131 ubuntu <none> <none>
kube-flannel-ds-f9zpw 1/1 Running 2 3h27m 192.168.2.133 work2 <none> <none>
kube-flannel-ds-lvnzp 1/1 Running 0 42h 192.168.2.131 ubuntu <none> <none>
kube-flannel-ds-x9l7r 1/1 Running 0 23h 192.168.2.132 work1 <none> <none>
kube-proxy-9cfxb 1/1 Running 0 3h27m 192.168.2.133 work2 <none> <none>
kube-proxy-mf5fq 1/1 Running 0 42h 192.168.2.131 ubuntu <none> <none>
kube-proxy-t4n7h 1/1 Running 0 23h 192.168.2.132 work1 <none> <none>
kube-scheduler-ubuntu 1/1 Running 0 42h 192.168.2.131 ubuntu <none> <none>
一个实验的k8s集群就搭建好了。
高可用拓扑
要想在生产环境中使用k8s,我们必须要保证集群的高可用,主要就是针对mater节点的高可用。官方提供了两种部署方式,供大家参考,由于硬件设备的限制,我就不搭建了。
结语
通过搭建k8s集群,我们初步掌握了k8s的架构,通过后面的学习,将会深入了解k8s的网络拓扑,网络实现原理等