实验环境
- CentOS 7.6.1810
[root@localhost ~]# cat /etc/redhat-release
CentOS Linux release 7.6.1810 (Core)
- docker
#docker version
18.09.0
- k8s
#kubectl version
1.15.3
更新物理机设置
- 修改主机的名字
vim /etc/hostname
#输入以下内容
node2
- 修改域名映射
vim /etc/hosts
#增加如下内容:
10.2.16.150 node1
10.2.16.151 master
10.2.16.152 node2
- 更新物理机软件源地址
cp /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
#不同版本的操作系统源地址不同
wget -O /etc/yum.repos.d/Centos-Base-aliyun.repo http://mirrors.aliyun.com/repo/Centos-7.repo
如果无法解析域名,则需要配置DNS服务器:
# 显示当前网络连接
nmcli connection show
# 修改当前网络连接对应的DNS服务器,这里的网络连接可以用名称或者UUID来标识(virbr0为对外网卡)
nmcli con mod virbr0 ipv4.dns "180.76.76.76 8.8.8.8"
# 使dns配置生效
nmcli con up virbr0
# 查看/etc/resolv.conf配置也已经更新
cat /etc/resolv.conf
- 增加docker的国内下载地址
yum -y install yum-utils
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# 关闭测试版本
yum-config-manager --disable docker-ce-edge
yum-config-manager --disable docker-ce-test
- 增加kubernente的国内下载地址
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=http://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=http://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg
http://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
- 更新yum包的索引
yum clean all && yum makecache fast
yum -y update
禁用系统一些设置
- 永久关闭swap
swapoff -a
vim /etc/fstab
#注释掉SWAP分区项,即可
# swap was on /dev/sda11 during installation
#UUID=0a55fdb5-a9d8-4215-80f7-f42f75644f69 none swap sw 0 0
#刷新swap使之生效
sysctl -p
- 临时关闭selinux
setenforce 0
- 永久关闭防火墙;不关闭需要打开一些端口
systemctl disable firewalld
systemctl stop firewalld
如果是不关闭,则需要打开以下端口
firewall-cmd --zone=public --add-port=80/tcp --permanent
firewall-cmd --zone=public --add-port=6443/tcp --permanent
firewall-cmd --zone=public --add-port=2379-2380/tcp --permanent
firewall-cmd --zone=public --add-port=10250-10255/tcp --permanent
firewall-cmd --zone=public --add-port=30000-32767/tcp --permanent
firewall-cmd --reloadfirewall-cmd --zone=public --list-ports
安装docker
yum list docker-ce --showduplicates|sort -r
# 安装指定版本18.09.0,最新的19版k8s暂不支持
systemctl stop docker && systemctl disable docker
yum remove docker-ce docker-ce-cli
yum install -y docker-ce-18.09.0-3.el7 docker-ce-cli-18.09.0-3.el7
systemctl start docker && systemctl enable docker
执行docker version会出现dockerk client和server的版本。client和server的版本不一致不影响. 设置代理:
touch /etc/docker/daemon.json
vim /etc/docker/daemon.json
{
"registry-mirrors": ["https://r0p1k0cb.mirror.aliyuncs.com"]
}
systemctl daemon-reload
systemctl restart docker
删除docker的命令为
yum list installed | grep docker
#卸载docker server和client
yum remove docker-ce docker-ce-cli
#删除镜像
rm -rf /var/lib/docker
安装k8s
# kubelet k8s的核心服务
# kubectl kubelet的client
# kubeadm k8s的配置工具
# kubernetes-cni 容器网络接口标准协议
yum install -y kubelet-1.15.3 kubeadm-1.15.3 kubectl-1.15.3 kubernetes-cni-0.7.5 --disableexcludes=kubernetes
systemctl daemon-reload
systemctl start kubelet && systemctl enable kubelet
kubectl version 会出现client version
[root@node2 ~]# kubectl version
Client Version: version.Info{Major:"1", Minor:"15", GitVersion:"v1.15.3", GitCommit:"2d3c76f9091b6bec110a5e63777c332469e0cba2", GitTreeState:"clean", BuildDate:"2019-08-19T11:13:54Z", GoVersion:"go1.12.9", Compiler:"gc", Platform:"linux/amd64"}
The connection to the server localhost:8080 was refused - did you specify the right host or port?
如果曾经安装过k8s,没卸载干净会出现
[root@node-buaa-151-152 ~]# kubectl version
Client Version: version.Info{Major:"1", Minor:"15", GitVersion:"v1.15.3", GitCommit:"2d3c76f9091b6bec110a5e63777c332469e0cba2", GitTreeState:"clean", BuildDate:"2019-08-19T11:13:54Z", GoVersion:"go1.12.9", Compiler:"gc", Platform:"linux/amd64"}
Unable to connect to the server: x509: certificate signed by unknown authority (possibly because of "crypto/rsa: verification error" while trying to verify candidate authority certificate "kubernetes")
解决方法为:
rm -r $HOME/.kube
systemctl restart kubelet
配置流量转发
setenforce 0
cat <<EOF > /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
vm.swappiness = 0
EOF
modprobe br_netfilter
sysctl -p /etc/sysctl.d/k8s.conf
sysctl --system
systemctl daemon-reload
systemctl restart kubelet
K8S Master节点配置
- 生成kubeadm.conf 该配置文件是主节点的组件的配置汇总文件
# 修改了hostname的话需要重新生成
kubeadm config print init-defaults ClusterConfiguration > kubeadm.conf
- 修改localAPIEndpoint(Master的地址)地址为
10.2.16.151
- 修改imageRepository
获取国内镜像下载地址的方法
registry.cn-hangzhou.aliyuncs.com/google_containers
registry.cn-beijing.aliyuncs.com/escience/escience-beijing
- 修改kubernetesVersion为
1.15.3
- 配置子网网络
networking:
dnsDomain: cluster.local
podSubnet: 10.244.0.0/16
serviceSubnet: 10.96.0.0/12
这里的podSubnet和ServiceSubnet分别是pods和service的子网网络,后续flannel网格需要用到。
- 拉取k8s必要模块镜像
[root@master-buaa-151 root]# kubeadm config images list --config kubeadm.conf
registry.cn-beijing.aliyuncs.com/escience/escience-beijing/kube-apiserver:v1.15.3
registry.cn-beijing.aliyuncs.com/escience/escience-beijing/kube-controller-manager:v1.15.3
registry.cn-beijing.aliyuncs.com/escience/escience-beijing/kube-scheduler:v1.15.3
registry.cn-beijing.aliyuncs.com/escience/escience-beijing/kube-proxy:v1.15.3
registry.cn-beijing.aliyuncs.com/escience/escience-beijing/pause:3.1
registry.cn-beijing.aliyuncs.com/escience/escience-beijing/etcd:3.3.10
registry.cn-beijing.aliyuncs.com/escience/escience-beijing/coredns:1.3.1
下载全部当前版本的k8s所关联的镜像
kubeadm config images pull --config ./kubeadm.conf
- 初始化Master节点
kubeadm init --config ./kubeadm.conf
保留一个输出,供后续查看
[root@master-buaa-151 root]# kubeadm init --config ./kubeadm.conf
[init] Using Kubernetes version: v1.15.3
[preflight] Running pre-flight checks
[WARNING IsDockerSystemdCheck]: detected "cgroupfs" as the Docker cgroup driver. The recommended driver is "systemd". Please follow the guide at https://kubernetes.io/docs/setup/cri/
[WARNING Hostname]: hostname "master-buaa-151" could not be reached
[WARNING Hostname]: hostname "master-buaa-151": lookup master-buaa-151 on 114.114.114.114:53: no such host
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Activating the kubelet service
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [master-buaa-151 localhost] and IPs [10.2.16.151 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [master-buaa-151 localhost] and IPs [10.2.16.151 127.0.0.1 ::1]
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [master-buaa-151 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 10.2.16.151]
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[kubelet-check] Initial timeout of 40s passed.
[apiclient] All control plane components are healthy after 48.012878 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config-1.15" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Skipping phase. Please see --upload-certs
[mark-control-plane] Marking the node master-buaa-151 as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node master-buaa-151 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
[bootstrap-token] Using token: abcdef.0123456789abcdef
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 10.2.16.151:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:906eeda5c63878bb94a2435514149b5cfa30c8f2310e926f7882ca2bd0daf744
注意这里的 kubeadm join ...,是node加入到master的钥匙。
4. 执行集群的基本配置
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
验证集群安装成功
[root@master-buaa-151 root]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master-buaa-151 NotReady master 3m1s v1.15.3
[root@master-buaa-151 root]# kubectl get cs
NAME STATUS MESSAGE ERROR
scheduler Healthy ok
controller-manager Healthy ok
etcd-0 Healthy {"health":"true"}
目前只有一个master且是NotReady状态。
5. 配置集群内部通信flannel网络
- 下载flannel配置文件
wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
- 修改net-config.json,确保Network和kubeadm.conf中的podSubnet地址一致
net-conf.json: |
{
"Network": "10.244.0.0/16",
"Backend": {
"Type": "vxlan"
}
}
- 配置flannel
[root@master-buaa-151 root]# kubectl apply -f kube-flannel.yml
#第一次创建的话,是created
podsecuritypolicy.policy/psp.flannel.unprivileged created
clusterrole.rbac.authorization.k8s.io/flannel created
clusterrolebinding.rbac.authorization.k8s.io/flannel created
serviceaccount/flannel created
configmap/kube-flannel-cfg created
daemonset.apps/kube-flannel-ds-amd64 created
daemonset.apps/kube-flannel-ds-arm64 created
daemonset.apps/kube-flannel-ds-arm created
daemonset.apps/kube-flannel-ds-ppc64le created
daemonset.apps/kube-flannel-ds-s390x created
- 验证节点
[root@master-buaa-151 root]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master-buaa-151 Ready master 16m v1.15.3
# 可能执行该命令后,返回的状态仍然为NotReady,请耐心等待几分钟即可
K8S Node节点配置
- 确认外部环境
- 关闭selinux
#关闭selinux
setenforce 0
#查看
getenforce
- 关闭swap
swapoff -a
- 确认kubelet已经成为后台服务,开机启动
[root@node-buaa-151-150 ~]# systemctl status kubelet
● kubelet.service - kubelet: The Kubernetes Node Agent
Loaded: loaded (/usr/lib/systemd/system/kubelet.service; disabled; vendor preset: disabled)
Drop-In: /usr/lib/systemd/system/kubelet.service.d
└─10-kubeadm.conf
Active: inactive (dead)
Docs: https://kubernetes.io/docs/
- 将master的/etc/kubenetes/admin/conf传给node
scp /etc/kubernetes/admin.conf root@10.2.16.150:/home/node1
scp /etc/kubernetes/admin.conf root@10.2.16.152:/home/node2
可以看到node1和node2已经有了admin.conf文件
[root@node-buaa-151-150 node1]# ls
admin.conf
- 将master中的kube-flannel.yml传送给node1和node2
scp /home/root/kube-flannel.yml root@10.2.16.150:/home/node1
scp /home/root/kube-flannel.yml root@10.2.16.152:/home/node2
- 登录node创建kube配置文件环境
mkdir -p $HOME/.kube
cp -i /home/node1/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config
- node节点加入master,成为集群
kubeadm join 10.2.16.151:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:906eeda5c63878bb94a2435514149b5cfa30c8f2310e926f7882ca2bd0daf744
如果不出意外。输出
[root@node-buaa-151-150 node1]# kubeadm join 10.2.16.151:6443 --token abcdef.0123456789abcdef --discovery-token-ca-cert-hash sha256:906eeda5c63878bb94a2435514149b5cfa30c8f2310e926f7882ca2bd0daf744
[preflight] Running pre-flight checks
[WARNING IsDockerSystemdCheck]: detected "cgroupfs" as the Docker cgroup driver. The recommended driver is "s ystemd". Please follow the guide at https://kubernetes.io/docs/setup/cri/
[WARNING Hostname]: hostname "node-buaa-151-150" could not be reached
[WARNING Hostname]: hostname "node-buaa-151-150": lookup node-buaa-151-150 on 8.8.8.8:53: no such host
[WARNING Service-Kubelet]: kubelet service is not enabled, please run 'systemctl enable kubelet.service'
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[kubelet-start] Downloading configuration for the kubelet from the "kubelet-config-1.15" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Activating the kubelet service
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.
Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
如果曾经k8s没有卸载干净,会出现
- 曾经的配置文件没有删除
[root@node-buaa-151-152 node2]# kubeadm join 10.2.16.151:6443 --token abcdef.0123456789abcdef --discovery-token-ca-cert-hash sha256:906eeda5c63878bb94a2435514149b5cfa30c8f2310e926f7882ca2bd0daf744
[preflight] Running pre-flight checks
[WARNING IsDockerSystemdCheck]: detected "cgroupfs" as the Docker cgroup driver. The recommended driver is "systemd". Please follow the guide at https://kubernetes.io/docs/setup/cri/
[WARNING Hostname]: hostname "node-buaa-151-152" could not be reached
[WARNING Hostname]: hostname "node-buaa-151-152": lookup node-buaa-151-152 on 8.8.8.8:53: no such host
error execution phase preflight: [preflight] Some fatal errors occurred:
[ERROR FileAvailable--etc-kubernetes-kubelet.conf]: /etc/kubernetes/kubelet.conf already exists
[ERROR FileAvailable--etc-kubernetes-bootstrap-kubelet.conf]: /etc/kubernetes/bootstrap-kubelet.conf already exists
[ERROR Port-10250]: Port 10250 is in use
[ERROR FileAvailable--etc-kubernetes-pki-ca.crt]: /etc/kubernetes/pki/ca.crt already exists
[preflight] If you know what you are doing, you can make a check non-fatal with `--ignore-preflight-errors=...`
解决方法是:
rm -rf /etc/kubernetes/kubelet.conf /etc/kubernetes/pki/ca.crt /etc/kubernetes/bootstrap-kubelet.conf
kubeadm reset
systemctl restart kubelet
# 然后再执行kubeadm join
- 流量转接
[root@node-buaa-151-150 ~]# kubeadm join 10.2.16.151:6443 --token abcdef.0123456789abcdef --discovery-token-ca-cert-hash sha256:906eeda5c63878bb94a2435514149b5cfa30c8f2310e926f7882ca2bd0daf744
[preflight] Running pre-flight checks
[WARNING IsDockerSystemdCheck]: detected "cgroupfs" as the Docker cgroup driver. The recommended driver is "systemd". Please follow the guide at https://kubernetes.io/docs/setup/cri/
[WARNING Hostname]: hostname "node-buaa-151-150" could not be reached
[WARNING Hostname]: hostname "node-buaa-151-150": lookup node-buaa-151-150 on 8.8.8.8:53: no such host
error execution phase preflight: [preflight] Some fatal errors occurred:
[ERROR FileContent--proc-sys-net-bridge-bridge-nf-call-iptables]: /proc/sys/net/bridge/bridge-nf-call-iptables contents are not set to 1
[preflight] If you know what you are doing, you can make a check non-fatal with `--ignore-preflight-errors=...`
解决方法是
echo 1 > /proc/sys/net/bridge/bridge-nf-call-iptables
echo 1 > /proc/sys/net/bridge/bridge-nf-call-ip6tables
- 设置node的flannel网络
[root@node-buaa-151-150 node1]# kubectl apply -f /home/node1/kube-flannel.yml
podsecuritypolicy.policy/psp.flannel.unprivileged configured
clusterrole.rbac.authorization.k8s.io/flannel unchanged
clusterrolebinding.rbac.authorization.k8s.io/flannel unchanged
serviceaccount/flannel unchanged
configmap/kube-flannel-cfg unchanged
daemonset.apps/kube-flannel-ds-amd64 unchanged
daemonset.apps/kube-flannel-ds-arm64 unchanged
daemonset.apps/kube-flannel-ds-arm unchanged
daemonset.apps/kube-flannel-ds-ppc64le unchanged
daemonset.apps/kube-flannel-ds-s390x unchanged
输入ip route show
验证。
如果失败unchanged,则使用
#注意,生产不能运行这一步,所有的pod都会运行不了,失去通信
kubectl delete -f kube-flanned.yml
kubectl apply -f /home/node1/kube-flannel.yml
- 验证成功的标志:
[root@node-buaa-151-150 node1]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master-buaa-151 Ready master 89m v1.15.3
node-buaa-151-150 NotReady <none> 17m v1.15.3
node-buaa-151-152 Ready <none> 3m27s v1.15.3
node已经由NotReady改为Ready,如果没有变,就再耐心等待一会。
至此,K8s集群搭建完毕。HA暂未实现。
安装WeaveScope
#安装插件
kubectl apply -f "https://cloud.weave.works/k8s/scope.yaml?k8s-version=$(kubectl version | base64 | tr -d '\n')"
#设置端口映射,浏览器打开,需要等待容器启动,多执行几次
kubectl port-forward --address=0.0.0.0 -n weave "$(kubectl get -n weave pod --selector=weave-scope-component=app -o jsonpath='{.items..metadata.name}')" 4040
安装dashboard
kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/v2.0.0-beta3/aio/deploy/recommended.yaml
# 注意代理需要在pod所在的机器上
kubectl proxy --address='0.0.0.0' --accept-hosts='^*$'
https://{pod-alive}:6443/api/v1/namespaces/kube-system/services/https:kubernetes-dashboard:/proxy
问题和解决方案汇总
kubectl describe node node-name
- DiskPressure
当出现磁盘压力的时候DiskPressure=True.默认镜像下载的缓存文件存储在/var(2G的容量)目录下,重试次数过多会让这个目录不够用。可以使用
df -h
查看容量大小;使用yum clean all
可以清理所有的缓存和包等。 当出现这个问题的时候,node是NotReady状态。
注意事项
- Master重启后需要一段时间才能ready,耐心等待