创建etcd集群
下载用于生成证书的工具
# 下载工具
wget https://pkg.cfssl.org/R1.2/cfssl_linux-amd64
wget https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64
# 将工具移动到指定目录
mv cfssl_linux-amd64 /usr/bin/cfssl
mv cfssljson_linux-amd64 /usr/bin/cfssljson
# 将工具加上可执行权限
chmod +x /usr/bin/cfssl
chmod +x /usr/bin/cfssljson
生成ca证书
自签证书颁发机构(CA)
ca配置文件
# 创建一个目录
mkdir -p ~/etcd_tls
# 切换到该目录
cd ~/etcd_tls
# 生成ca配置文件
cat > ca-config.json << EOF
{
"signing": {
"default": {
"expiry": "87600h"
},
"profiles": {
"www": {
"expiry": "87600h",
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
]
}
}
}
}
EOF
ca证书请求文件
cat > ca-csr.json << EOF
{
"CN": "etcd CA",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "Beijing",
"ST": "Beijing"
}
]
}
EOF
生成 ca 证书 (ca.pem 和 ca-key.pem 文件)
cfssl gencert -initca ca-csr.json | cfssljson -bare ca -
使用上面创建的自签CA签发Etcd HTTPS证书
创建证书申请文件
# 创建证书申请文件:
cat > server-csr.json << EOF
{
"CN": "etcd",
"hosts": [
"172.16.16.180",
"172.16.16.181",
"172.16.16.182"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "BeiJing",
"ST": "BeiJing"
}
]
}
EOF
# 注:上述文件hosts字段中IP为所有etcd节点的集群内部通信IP,一个都不能少!为了方便后期扩容可以多写几个预留的IP。
生成证书 (server.pem 和 server-key.pem)
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=www server-csr.json | cfssljson -bare server
创建etcd集群所使用的的目录
mkdir /opt/etcd/{bin,cfg,ssl} -p
将前面生成的证书拷贝到目录
cp ~/etcd_tls/ca*pem ~/etcd_tls/server*pem /opt/etcd/ssl/
开始安装etcd
下载etcd
# 创建一个用于保存下载软件的目录
mkdir ~/software
cd ~/software
# 下载etcd包
wget https://github.com/etcd-io/etcd/releases/download/v3.5.8/etcd-v3.5.8-linux-amd64.tar.gz
# 解压etcd包
tar -xf etcd-v3.5.8-linux-amd64.tar.gz
# 将etcd相关程序拷贝到前面创建的目录
mv etcd-v3.5.8-linux-amd64/{etcd,etcdctl} /opt/etcd/bin
创建etcd配置文件
cat > /opt/etcd/cfg/etcd.conf << EOF
#[Member]
ETCD_NAME="etcd-1"
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_PEER_URLS="https://172.16.16.180:2380"
ETCD_LISTEN_CLIENT_URLS="https://172.16.16.180:2379"
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://172.16.16.180:2380"
ETCD_ADVERTISE_CLIENT_URLS="https://172.16.16.180:2379"
ETCD_INITIAL_CLUSTER="etcd-1=https://172.16.16.180:2380,etcd-2=https://172.16.16.181:2380,etcd-3=https://172.16.16.182:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster-cvc-cvcv-ty"
ETCD_INITIAL_CLUSTER_STATE="new"
EOF
# 参数说明
ETCD_NAME : 节点名称,集群中唯一
ETCD_DATA_DIR : 数据目录
ETCD_LISTEN_PEER_URLS : 集群通信监听地址
ETCD_LISTEN_CLIENT_URLS : 客户端访问监听地址
ETCD_INITIAL_ADVERTISE_PEER_URLS : 集群通告地址
ETCD_ADVERTISE_CLIENT_URLS : 客户端通告地址
ETCD_INITIAL_CLUSTER : 集群节点地址
ETCD_INITIAL_CLUSTER_TOKEN : 集群Token
ETCD_INITIAL_CLUSTER_STATE : 加入集群的当前状态,new是新集群,existing表示加入已有集群
创建systemd服务文件管理etcd启动停止
cat > /usr/lib/systemd/system/etcd.service << EOF
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
[Service]
Type=notify
EnvironmentFile=/opt/etcd/cfg/etcd.conf
ExecStart=/opt/etcd/bin/etcd --cert-file=/opt/etcd/ssl/server.pem --key-file=/opt/etcd/ssl/server-key.pem --peer-cert-file=/opt/etcd/ssl/server.pem --peer-key-file=/opt/etcd/ssl/server-key.pem --trusted-ca-file=/opt/etcd/ssl/ca.pem --peer-trusted-ca-file=/opt/etcd/ssl/ca.pem --logger=zap
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
把配置拷贝到其他 etcd 节点
scp -r /opt/etcd/ root@172.16.16.181:/opt/
scp /usr/lib/systemd/system/etcd.service root@172.16.16.181:/usr/lib/systemd/system/
scp -r /opt/etcd/ root@172.16.16.182:/opt/
scp /usr/lib/systemd/system/etcd.service root@172.16.16.182:/usr/lib/systemd/system/
启动etcd
systemctl daemon-reload
systemctl start etcd
systemctl enable etcd
其他两个节点参数修改
vi /opt/etcd/cfg/etcd.conf
#[Member]
ETCD_NAME="etcd-1" # 修改此处,节点2改为etcd-2,节点3改为etcd-3
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_PEER_URLS="https://172.16.16.180:2380" # 修改此处为当前服务器IP
ETCD_LISTEN_CLIENT_URLS="https://172.16.16.180:2379" # 修改此处为当前服务器IP
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://172.16.16.180:2380" # 修改此处为当前服务器IP
ETCD_ADVERTISE_CLIENT_URLS="https://172.16.16.180:2379" # 修改此处为当前服务器IP
ETCD_INITIAL_CLUSTER="etcd-1=https://172.16.16.180:2380,etcd-2=https://172.16.16.181:2380,etcd-3=https://172.16.16.182:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
# 启动 etcd
systemctl daemon-reload
systemctl start etcd
systemctl enable etcd
部署etcd时候可能会出现的问题
问题一
# 因为是云服务器环境,所以说如果按着上面的etcd配置文件来配置的话会导致节点不通的情况
[root@master ~]# tcpdump -i eth0 -s0 -nnn port 2379
dropped privs to tcpdump
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), capture size 262144 bytes
08:51:08.245269 IP 39.86.24.130.42548 > 10.120.98.121.2379: Flags [S], seq 3202545622, win 64240, options [mss 1410,sackOK,TS val 850462034 ecr 0,nop,wscale 8], length 0
08:51:08.245284 IP 10.120.98.121.2379 > 39.86.24.130.42548: Flags [R.], seq 0, ack 3202545623, win 0, length 0
如上图所述,etcd监听在公网ip上,但是客户端请求时候由于云服务器环境会进行nat转换,导致etcd不会接受数据
解决办法
#[Member]
ETCD_NAME="etcd-1"
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_PEER_URLS="https://0.0.0.0:2380" # 将etcd监听的地址改为0.0.0.0即可
ETCD_LISTEN_CLIENT_URLS="https://0.0.0.0:2379" # 将etcd监听的地址改为0.0.0.0即可
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://172.16.0.3:2380"
ETCD_ADVERTISE_CLIENT_URLS="https://172.16.0.3:2379"
ETCD_INITIAL_CLUSTER="etcd-1=https://172.16.0.3:2380,etcd-2=https://172.16.0.4:2380,etcd-3=https://172.16.0.5:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster-yyt78888-rterdg"
ETCD_INITIAL_CLUSTER_STATE="new"
测试etcd集群是否健康
[root@master ~]# ETCDCTL_API=3 /opt/etcd/bin/etcdctl --cacert=/opt/etcd/ssl/ca.pem --cert=/opt/etcd/ssl/server.pem --key=/opt/etcd/ssl/server-key.pem --endpoints="https://172.16.0.3:2379,https://172.16.0.4:2379,https://172.16.0.5:2379" endpoint health --write-out=table
+------------------------------+--------+--------------+-------+
| ENDPOINT | HEALTH | TOOK | ERROR |
+------------------------------+--------+--------------+-------+
| https://172.16.0.3:2379 | true | 10.459873ms | |
| https://172.16.0.4:2379 | true | 23.587933ms | |
| https://172.16.0.5:2379 | true | 355.103131ms | |
+------------------------------+--------+--------------+-------+
开始部署k8s
k8s初始化配置文件
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 172.16.0.3 # 这里需要改为公网ip
bindPort: 6443
nodeRegistration:
criSocket: unix:///run/containerd/containerd.sock
imagePullPolicy: IfNotPresent
name: master # master节点的主机名
taints: null
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd:
external:
endpoints:
- https://172.16.0.3:2379 # 这里需要改为自己部署的etcd集群的地址
- https://172.16.0.4:2379
- https://172.16.0.5:2379
caFile: /opt/etcd/ssl/ca.pem # 认证密钥文件
certFile: /opt/etcd/ssl/server.pem
keyFile: /opt/etcd/ssl/server-key.pem
imageRepository: registry.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: 1.27.0
controlPlaneEndpoint: 172.16.0.3:6443 # 注意这里要加上,要不然后面添加第二个master时候会有报错提示
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
podSubnet: 10.244.0.0/16 #pod的网段地址配置
scheduler: {}
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs # 配置使kubeproxy使用ipvs模式转发流量
初始化
[root@master ~]# kubeadm init --config kube-config.yaml
产生如下输出
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 172.16.0.3:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:7919df493c3jkk9fe545a70c6e3c1d9iooi
按照上面的提示执行命令
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
# 可以看到控制层面创建成功
[root@master ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
master Ready control-plane 8m7s v1.27.1
# 加入工作节点
在工作节点执行命令
kubeadm join 172.16.0.3:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:7919df493c3jkk9fe545a70c6e3c1d9iooi
添加第二个控制层面
# 执行命令
[root@master ~]# kubeadm init phase upload-certs --upload-certs
[upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
[upload-certs] Using certificate key:
57e92e1c0039191f73898984c78cfe5f6f487bd4d8624454f3a850f4a1sh
# 执行命令
[root@master ~]# kubeadm token create --print-join-command
kubeadm join 172.16.0.3:6443 --token qk5fng.khu1iwra4eccvc62 --discovery-token-ca-cert-hash sha256:7919df493c373bae9e0yuyu70c6e3c1d931dcc2ed756ae
# 将上面得到的两个拼接
kubeadm join 172.16.0.3:6443 --token qk5fng.khu1iwra4eccvc62 --discovery-token-ca-cert-hash sha256:7919df493c373bae9e0yuyu70c6e3c1d931dcc2ed756ae --control-plane --certificate-key 57e92e1c0039191f73898984c78cfe5f6f487bd4d8624454f3a850f4a1sh
# 注意:要加上--control-plane --certificate-key ,不然就会添加为node节点而不是master
执行添加命令时候会有如下输出
[root@master2 ~]# kubeadm join 172.16.0.3:6443 --token qk5fng.khu1iwra4eccvc62 --discovery-token-ca-cert-hash sha256:7919df493c373bae9e0yuyu70c6e3c1d931dcc2ed756ae --control-plane --certificate-key 57e92e1c0039191f73898984c78cfe5f6f487bd4d8624454f3a850f4a1sh
[preflight] Running pre-flight checks
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
error execution phase preflight:
One or more conditions for hosting a new control plane instance is not satisfied.
unable to add a new control plane instance to a cluster that doesn't have a stable controlPlaneEndpoint address
Please ensure that:
* The cluster has a stable controlPlaneEndpoint address.
* The certificates that must be shared among control plane instances are provided.
To see the stack trace of this error execute with --v=5 or higher
解决办法
修改master的endpoint地址
kubectl edit cm kubeadm-config -n kube-system
修改结果如下
修改完后再次在第二个master上执行初始化命令
# 显示如下报错,缺少etcd的证书文件
error execution phase control-plane-prepare/download-certs: error downloading certs: the Secret does not include the required certificate or key - name: external-etcd-ca.crt, path: /opt/etcd/ssl/ca.pem
To see the stack trace of this error execute with --v=5 or higher
# 因为是使用的外部etcd所以要使用如下命令生成token
kubeadm init phase upload-certs --upload-certs --config kube-config.yaml
再次执行添加master后成功
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.