系统配置

1.在所有节点上开启ip_forward并禁用IPv6

# 所有节点配置ip转发
cat <<EOF >  /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
# 让配置生效
sysctl --system

2.关闭Swap

swapoff -a  && sysctl -w vm.swappiness=0

#防止开机自动挂载 swap 分区
sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab

或 sed -ri '/^[^#]*swap/s@^@#@' /etc/fstab

3.在所有节点上设置selinux为permissive模式

# 修改配置
vim /etc/selinux/config
SELINUX=permissive

setenforce 0

安装docker

在你的每台机器上安装 Docker，推荐安装 19.03.11 版本。

curl -fsSL https://get.docker.com | bash -s docker --mirror Aliyun

修改docker的cgroupdriver类型为systemd。cgroups和systemd的区别

# 设置 daemon
cat > /etc/docker/daemon.json <<EOF
{
  "exec-opts": ["native.cgroupdriver=systemd"],
  "log-driver": "json-file",
  "log-opts": {
    "max-size": "100m"
  },
  "storage-driver": "overlay2"
}
EOF

mkdir -p /etc/systemd/system/docker.service.d

# 重启 docker.
systemctl daemon-reload
systemctl restart docker
# 开机启动
sudo systemctl enable docker

验证docker版本和cgroupdriver类型

wangxw@wangxw-virtual-machine:~/Desktop$ sudo docker version
Client: Docker Engine - Community
 Version:           19.03.13
 API version:       1.40
 Go version:        go1.13.15
 Git commit:        4484c46d9d
 Built:             Wed Sep 16 17:02:52 2020
 OS/Arch:           linux/amd64
 Experimental:      false

wangxw@wangxw-virtual-machine:~/Desktop$ sudo docker info | grep Cgroup
 Cgroup Driver: systemd

安装kubeadm、kubelet、kubectl

安装kubectl

# 需要注意版本对应，这里直接安装最新版本
sudo apt-get update && sudo apt-get install -y apt-transport-https curl
curl -s https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | sudo apt-key add -
cat <<EOF | sudo tee /etc/apt/sources.list.d/kubernetes.list
deb https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main
EOF
sudo apt-get update
sudo apt-get install -y kubelet=1.18.0-00 kubeadm=1.18.0-00 kubectl=1.18.0-00
sudo apt-mark hold kubelet=1.18.0-00 kubeadm=1.18.0-00 kubectl=1.18.0-00

提前拉取k8s镜像

#拉镜像
kubeadm config images list |sed -e 's/^/docker pull /g' -e 's#k8s.gcr.io#registry.aliyuncs.com/google_containers#g'|sh -x

#修改tag，将镜像标记为k8s.gcr.io的名称
docker images |grep google_containers|awk '{print "docker tag ",$1":"$2,$1":"$2}' |sed -e 's#registry.aliyuncs.com/google_containers#k8s.gcr.io#2' |sh -x

#删除无用的镜像
docker images | grep google_containers| awk '{print "docker rmi "  $1":"$2}' | sh -x

## 查看镜像
root@ubuntu:/home/root2/Desktop# docker images
REPOSITORY                           TAG                 IMAGE ID            CREATED             SIZE
quay.io/coreos/flannel               v0.13.0-rc2         79dd6d6368e2        3 weeks ago         57.2MB
k8s.gcr.io/kube-proxy                v1.18.0             43940c34f24f        6 months ago        117MB
k8s.gcr.io/kube-apiserver            v1.18.0             74060cea7f70        6 months ago        173MB
k8s.gcr.io/kube-controller-manager   v1.18.0             d3e55153f52f        6 months ago        162MB
k8s.gcr.io/kube-scheduler            v1.18.0             a31f78c7c8ce        6 months ago        95.3MB
k8s.gcr.io/pause                     3.2                 80d28bedfe5d        8 months ago        683kB
k8s.gcr.io/coredns                   1.6.7               67da37a9a360        8 months ago        43.8MB
k8s.gcr.io/etcd                      3.4.3-0             303ce5db0e90        11 months ago       288MB

初始化master

kubeadm init <args>

root@ubuntu:/home/root2/Desktop# kubeadm init
W1013 12:57:51.437142   14578 version.go:102] could not fetch a Kubernetes version from the internet: unable to get URL "https://dl.k8s.io/release/stable-1.txt": Get https://storage.googleapis.com/kubernetes-release/release/stable-1.txt: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)
W1013 12:57:51.437261   14578 version.go:103] falling back to the local client version: v1.18.0
W1013 12:57:51.437425   14578 configset.go:202] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
[init] Using Kubernetes version: v1.18.0
[preflight] Running pre-flight checks
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [ubuntu kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.2.131]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [ubuntu localhost] and IPs [192.168.2.131 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [ubuntu localhost] and IPs [192.168.2.131 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
W1013 12:57:54.777363   14578 manifests.go:225] the default kube-apiserver authorization-mode is "Node,RBAC"; using "Node,RBAC"
[control-plane] Creating static Pod manifest for "kube-scheduler"
W1013 12:57:54.778385   14578 manifests.go:225] the default kube-apiserver authorization-mode is "Node,RBAC"; using "Node,RBAC"
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[apiclient] All control plane components are healthy after 22.507181 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config-1.18" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Skipping phase. Please see --upload-certs
[mark-control-plane] Marking the node ubuntu as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node ubuntu as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
[bootstrap-token] Using token: 348gbr.yotf8hroeq58asni
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.2.131:6443 --token 348gbr.yotf8hroeq58asni \
    --discovery-token-ca-cert-hash sha256:a28ea4a7c45b4e2c634498a87dbebee4155476c9fa9c92cd294636e067d883a1

查看证书和签名

# 默认生成的证书，可以通过--cert-dir参数指定自定义的证书
root@node1-virtual-machine:# ls /etc/kubernetes/pki
apiserver.crt              apiserver-etcd-client.key  apiserver-kubelet-client.crt  ca.crt  etcd                front-proxy-ca.key      front-proxy-client.key  sa.pub
apiserver-etcd-client.crt  apiserver.key              apiserver-kubelet-client.key  ca.key  front-proxy-ca.crt  front-proxy-client.crt  sa.key

配置集群网络插件

# 查看master节点，发现master节点处于NotReady状态，因为我们还没有为集群设置网络
root@ubuntu:/home/root2/Desktop# kubectl get nodes 
NAME     STATUS     ROLES    AGE    VERSION
ubuntu   NotReady   master   4m2s   v1.18.0

安装Flannel网络插件

wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml

flannel会默认选择第一张网卡，如果宿主机有多张网卡，需要配置kube-flannel.ymlcontainers部分

root@ubuntu:/home/root2/Desktop# vim kube-flannel.yml 

 containers:
      - name: kube-flannel
        image: quay.io/coreos/flannel:v0.10.0-amd64
        command:
        - /opt/bin/flanneld
        args:
        - --ip-masq
        - --kube-subnet-mgr
        - --iface=ens33              #添加

执行kube-flannel.yml

root@ubuntu:/home/root2/Desktop# kubectl apply -f kube-flannel.yml

坑：修改pod `cidr`支持CNI插件

过几分钟后查看集群pod状态,发现flannel没有起来，如下

root@ubuntu:/home/root2/Desktop# kubectl get pod -n kube-system 
NAME                             READY   STATUS              RESTARTS   AGE
coredns-66bff467f8-4rm6p         0/1     ContainerCreating   0          7m
coredns-66bff467f8-swj2v         0/1     ContainerCreating   0          7m
etcd-ubuntu                      1/1     Running             0          7m
kube-apiserver-ubuntu            1/1     Running             0          7m
kube-controller-manager-ubuntu   1/1     Running             0          7m
kube-flannel-ds-62l9t            0/1     CrashLoopBackOff    2          59s
kube-proxy-nbgvm                 1/1     Running             0          7m
kube-scheduler-ubuntu            1/1     Running             0          7m

查看pod kube-flannel-ds-62l9t日志

root@ubuntu:/home/root2/Desktop# kubectl logs kube-flannel-ds-62l9t -n kube-system 
I1013 06:07:46.724199       1 main.go:531] Using interface with name ens33 and address 192.168.2.131
I1013 06:07:46.724298       1 main.go:548] Defaulting external address to interface address (192.168.2.131)
W1013 06:07:46.724320       1 client_config.go:517] Neither --kubeconfig nor --master was specified.  Using the inClusterConfig.  This might not work.
I1013 06:07:46.919480       1 kube.go:119] Waiting 10m0s for node controller to sync
I1013 06:07:46.919572       1 kube.go:306] Starting kube subnet manager
I1013 06:07:47.919837       1 kube.go:126] Node controller sync successful
I1013 06:07:47.919881       1 main.go:246] Created subnet manager: Kubernetes Subnet Manager - ubuntu
I1013 06:07:47.919889       1 main.go:249] Installing signal handlers
I1013 06:07:47.920071       1 main.go:390] Found network config - Backend type: vxlan
I1013 06:07:47.920281       1 vxlan.go:121] VXLAN config: VNI=1 Port=0 GBP=false Learning=false DirectRouting=false
E1013 06:07:47.920970       1 main.go:291] Error registering network: failed to acquire lease: node "ubuntu" pod cidr not assigned
I1013 06:07:47.921071       1 main.go:370] Stopping shutdownHandler...

提示nodeubnutupod的cidr没有被分配

Error registering network: failed to acquire lease: node "ubuntu" pod cidr not assigned

查阅flannel文档发现，在初始化集群的时候需要为集群管理的pod划分子网，如下：

我们可以通过以下方式在集群初始化后修改集群管理Pod的CIDR。

集群初始化后修改集群管理Pod的CIDR

但是我通过这种方式修改集群POD后，发现master节点的coredns无法启动。所以我只能通过kubeadm reset重置集群，重新使用kubeadm init初始化集群了。

将woker节点节点加入集群

在初始化master后，按照上述步骤同样在woker节点安装kubeadm、kubectl、kubelet、dokcer。安装完成后执行

kubeadm join 192.168.2.131:6443 --token 348gbr.yotf8hroeq58asni \
    --discovery-token-ca-cert-hash sha256:a28ea4a7c45b4e2c634498a87dbebee4155476c9fa9c92cd294636e067d883a1

将worker节点加入集群，如果忘记了添加work node的命令可以通过

kubeadm token create --print-join-command

找到。

验证集群是否正常

root@ubuntu:/home/root2/Desktop# kubectl get node -o wide
NAME     STATUS   ROLES    AGE     VERSION   INTERNAL-IP     EXTERNAL-IP   OS-IMAGE             KERNEL-VERSION     CONTAINER-RUNTIME
ubuntu   Ready    master   42h     v1.18.0   192.168.2.131   <none>        Ubuntu 20.04.1 LTS   5.4.0-48-generic   docker://19.3.13
work1    Ready    <none>   23h     v1.18.0   192.168.2.132   <none>        Ubuntu 20.04.1 LTS   5.4.0-42-generic   docker://19.3.13
work2    Ready    <none>   3h26m   v1.18.0   192.168.2.133   <none>        Ubuntu 20.04.1 LTS   5.4.0-51-generic   docker://19.3.13

root@ubuntu:/home/root2/Desktop# kubectl get pod -n kube-system -o wide
NAME                             READY   STATUS    RESTARTS   AGE     IP              NODE     NOMINATED NODE   READINESS GATES
coredns-66bff467f8-78lsr         1/1     Running   0          42h     10.244.0.3      ubuntu   <none>           <none>
coredns-66bff467f8-l6nts         1/1     Running   0          42h     10.244.0.2      ubuntu   <none>           <none>
etcd-ubuntu                      1/1     Running   0          42h     192.168.2.131   ubuntu   <none>           <none>
kube-apiserver-ubuntu            1/1     Running   0          42h     192.168.2.131   ubuntu   <none>           <none>
kube-controller-manager-ubuntu   1/1     Running   0          42h     192.168.2.131   ubuntu   <none>           <none>
kube-flannel-ds-f9zpw            1/1     Running   2          3h27m   192.168.2.133   work2    <none>           <none>
kube-flannel-ds-lvnzp            1/1     Running   0          42h     192.168.2.131   ubuntu   <none>           <none>
kube-flannel-ds-x9l7r            1/1     Running   0          23h     192.168.2.132   work1    <none>           <none>
kube-proxy-9cfxb                 1/1     Running   0          3h27m   192.168.2.133   work2    <none>           <none>
kube-proxy-mf5fq                 1/1     Running   0          42h     192.168.2.131   ubuntu   <none>           <none>
kube-proxy-t4n7h                 1/1     Running   0          23h     192.168.2.132   work1    <none>           <none>
kube-scheduler-ubuntu            1/1     Running   0          42h     192.168.2.131   ubuntu   <none>           <none>

一个实验的k8s集群就搭建好了。

高可用拓扑

要想在生产环境中使用k8s，我们必须要保证集群的高可用，主要就是针对mater节点的高可用。官方提供了两种部署方式，供大家参考，由于硬件设备的限制，我就不搭建了。

kubernetes.io/zh/docs/set…

结语

通过搭建k8s集群，我们初步掌握了k8s的架构，通过后面的学习，将会深入了解k8s的网络拓扑，网络实现原理等

通过kubeadm安装k8s集群