k8s高可用离线集群安装

207 阅读15分钟

k8s高可用部署架构图.png

图片2.png

第1章 etcd集群离线安装

1 .1 cfssl工具安装

wget https://pkg.cfssl.org/R1.2/cfssl_linux-amd64

wget https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64

wget https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64

chmod +x cfssl_linux-amd64 cfssljson_linux-amd64 cfssl-certinfo_linux-amd64

mv cfssl_linux-amd64 /usr/local/bin/cfssl

mv cfssljson_linux-amd64 /usr/local/bin/cfssljson

mv cfssl-certinfo_linux-amd64 /usr/bin/cfssl-certinfo

1.2 生成CA颁发证书

1.2.1 生成CA

#1.自签证书颁发机构(CA)
#876600 小时 100年

#证书过期:https://www.cnblogs.com/LiuChang-blog/p/15916998.html
mkdir -p ~/etcd_tls
cd ~/etcd_tls

Cfssl的配置文件:

cat > ca-config.json << EOF
{
  "signing": {
    "default": {
      "expiry": "876000h"
    },
    "profiles": {
      "kubernetes": {
         "expiry": "876000h",
         "usages": [
            "signing",
            "key encipherment",
            "server auth",
            "client auth"
        ]
      }
    }
}
}
EOF
 
#自签CA证书签名请求文件:

cat > ca-csr.json << EOF
{
    "CA":{"expiry":"876000h"},
    "CN": "kubernetes",
    "key": {
        "algo": "rsa",
        "size": 2048
    },
    "names": [
        {
            "C": "CN",
            "L": "Xian",
            "ST": "Xian",
            "O": "k8s",
            "OU": "System"
        }
    ]
}
EOF
 

#生成ca的证书和私钥:

cfssl gencert -initca ca-csr.json | cfssljson -bare ca -

#会生成ca.pem和ca-key.pem文件。

1.2.2 CA签发etcd 证书


#创建证书申请文件:

cat > etcd-csr.json << EOF
{
    "CN": "etcd",
    "hosts": [

      "192.167.14.228",
      "192.167.14.229",
      "192.167.14.246"

    ],
    "key": {
        "algo": "rsa",
        "size": 2048
    },
    "names": [
        {
            "C": "CN",
            "L": "Xian",
            "ST": "Xian",
            "O": "k8s",
            "OU": "System"
        }
    ]
}
EOF
 

#注:上述文件hosts字段中IP为所有etcd节点的集群内部通信IP,一个都不能少!为了方便后期扩容可以多写几个预留的IP。
#生成证书:

cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes \
etcd-csr.json | cfssljson -bare etcd

#会生成etcd.pem和etcd-key.pem文件。
#查看证书有效期:
cfssl certinfo -cert etcd.pem |grep not

1.3 etcd集群安装

1.3.1 资源准备

#服务器三台 centos7
192.167.14.228
192.167.14.229
192.167.14.246
#关闭防火墙,注:生产环境时防火墙不关闭,开放端口

systemctl stop firewalld
systemctl disable firewalld
systemctl status firewalld
#安装包准备
wget https://github.com/etcd-io/etcd/releases/download/v3.5.9/etcd-v3.5.9-linux-amd64.tar.gz
etcd-v3.5.9-linux-amd64.tar.gz

1.3.2 安装etcd二进制包

mkdir /opt/etcd/{bin,cfg,ssl} -p

tar zxvf etcd-v3.5.9-linux-amd64.tar.gz

mv etcd-v3.5.9-linux-amd64/{etcd,etcdctl} /opt/etcd/bin/

1.3.3 创建etcd配置文件

#192.167.14.228服务器:

cat > /opt/etcd/cfg/etcd.conf << EOF

#[Member]

ETCD_NAME="etcd-1"

ETCD_DATA_DIR="/var/lib/etcd/default.etcd"

ETCD_LISTEN_PEER_URLS="https://192.167.14.228:2380"

ETCD_LISTEN_CLIENT_URLS="https://192.167.14.228:2379"

#[Clustering]

ETCD_INITIAL_ADVERTISE_PEER_URLS="https://192.167.14.228:2380"

ETCD_ADVERTISE_CLIENT_URLS="https://192.167.14.228:2379"

ETCD_INITIAL_CLUSTER="etcd-1=https://192.167.14.228:2380,etcd-2=https://192.167.14.229:2380,etcd-3=https://192.167.14.246:2380"

ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"

ETCD_INITIAL_CLUSTER_STATE="new"

EOF
#192.167.14.229服务器:
cat > /opt/etcd/cfg/etcd.conf << EOF

#[Member]

ETCD_NAME="etcd-2"

ETCD_DATA_DIR="/var/lib/etcd/default.etcd"

ETCD_LISTEN_PEER_URLS="https://192.167.14.229:2380"

ETCD_LISTEN_CLIENT_URLS="https://192.167.14.229:2379"

 

#[Clustering]

ETCD_INITIAL_ADVERTISE_PEER_URLS="https://192.167.14.229:2380"

ETCD_ADVERTISE_CLIENT_URLS="https://192.167.14.229:2379"

ETCD_INITIAL_CLUSTER="etcd-1=https://192.167.14.228:2380,etcd-2=https://192.167.14.229:2380,etcd-3=https://192.167.14.246:2380"

ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"


EOF
#192.167.14.246服务器:

cat > /opt/etcd/cfg/etcd.conf << EOF

#[Member]

ETCD_NAME="etcd-3"

ETCD_DATA_DIR="/var/lib/etcd/default.etcd"

ETCD_LISTEN_PEER_URLS="https://192.167.14.246:2380"

ETCD_LISTEN_CLIENT_URLS="https://192.167.14.246:2379"

#[Clustering]

ETCD_INITIAL_ADVERTISE_PEER_URLS="https://192.167.14.246:2380"

ETCD_ADVERTISE_CLIENT_URLS="https://192.167.14.246:2379"

ETCD_INITIAL_CLUSTER="etcd-1=https://192.167.14.228:2380,etcd-2=https://192.167.14.229:2380,etcd-3=https://192.167.14.246:2380"

ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"


EOF

1.3.4 etcd设置为系统服务

#在228节点创建etcd.service服务文件 
cat > /usr/lib/systemd/system/etcd.service << EOF
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
[Service]
Type=notify
EnvironmentFile=/opt/etcd/cfg/etcd.conf
ExecStart=/opt/etcd/bin/etcd \
  --cert-file=/opt/etcd/ssl/etcd.pem \
  --key-file=/opt/etcd/ssl/etcd-key.pem \
  --client-cert-auth=true \
  --trusted-ca-file=/opt/etcd/ssl/ca.pem \
  --peer-cert-file=/opt/etcd/ssl/etcd.pem \
  --peer-key-file=/opt/etcd/ssl/etcd-key.pem \
  --peer-client-cert-auth=true \
  --peer-trusted-ca-file=/opt/etcd/ssl/ca.pem
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
#将该文件复制到229 246节点
scp /usr/lib/systemd/system/etcd.service root@192.167.14.229:/usr/lib/systemd/system/

scp /usr/lib/systemd/system/etcd.service root@192.167.14.246:/usr/lib/systemd/system/

1.3.5 节点配置etcd证书

#将cfssl工具生成的证书复制到228 229 246节点,配置文件路径
#复制的证书如下
ca.pem
etcd.pem
etcd-key.pem
#配置文件存放路径
/opt/etcd/ssl/

#cp ~/etcd_tls/ca*pem ~/etcd_tls/etcd*pem /opt/etcd/ssl/

#cp /home/k8s/ca*pem /home/k8s/etcd*pem /opt/etcd/ssl/


#将证书文件复制到主节点 119 223 226
 scp -r /opt/etcd/ root@192.167.14.119:/opt/
 scp -r /opt/etcd/ root@192.167.14.223:/opt/
 scp -r /opt/etcd/ root@192.167.14.226:/opt/

1.3.6 启动并设置开机启动

#启动并设置开机启动

systemctl daemon-reload

systemctl start etcd
systemctl stop etcd
#允许系统启动时etcd服务自启动
systemctl enable etcd
#查看etcd状态
systemctl status etcd

#查看集群状态

ETCDCTL_API=3 /opt/etcd/bin/etcdctl --cacert=/opt/etcd/ssl/ca.pem --cert=/opt/etcd/ssl/etcd.pem --key=/opt/etcd/ssl/etcd-key.pem --endpoints="https://192.167.14.228:2379,https://192.167.14.229:2379,https://192.167.14.246:2379" endpoint health --write-out=table

#注:生产环境开放端口

firewall-cmd --zone=public --add-port=2379/tcp --permanent
firewall-cmd --zone=public --add-port=2380/tcp --permanent
firewall-cmd --reload

第2章 containerd离线安装

2.1 containerd离线安装

2.1 .1 安装包准备

#1.上传至所有节点
containerd-1.7.2-linux-amd64.tar.gz

#2.创建目录containerd并解压
mkdir /root/containerd
tar -C /root/containerd -zxvf containerd-1.7.2-linux-amd64.tar.gz
#3.配置环境变量并生效
export PATH=$PATH:/usr/local/bin:/usr/local/sbin && source ~/.bashrc
#4.containerd生效
cd /root/containerd/bin
cp * /usr/bin
cp ctr /usr/local/bin

2.1.2 修改配置文件

#1.生成containerd运行配置文件
mkdir -p /etc/containerd/ && containerd config default > /etc/containerd/config.toml

#2.修改配置文件
cat > /etc/containerd/config.toml << EOF
oom_score = 0
# containerd容器存放的位置
root = "/home/containerd"        
state = "/run/containerd"
version = 2
[cgroup]
  path = ""
[debug]
  address = ""
  format = ""
  gid = 0
  level = ""
  uid = 0
[grpc]
  address = "/run/containerd/containerd.sock"
  gid = 0
  max_recv_message_size = 16777216
  max_send_message_size = 16777216
  tcp_address = ""
  tcp_tls_ca = ""
  tcp_tls_cert = ""
  tcp_tls_key = ""
  uid = 0
[metrics]
  address = ""
  grpc_histogram = false
[plugins."io.containerd.grpc.v1.cri"]
  sandbox_image = "k8s.gcr.io/pause:3.9"
  SystemdCgroup = true
  [plugins."io.containerd.grpc.v1.cri".registry]
    [plugins."io.containerd.grpc.v1.cri".registry.mirrors]

EOF

#如果有私有镜像库加以下内容

      [plugins."io.containerd.grpc.v1.cri".registry.mirrors."registry.k8s.io"]
        endpoint = ["http://192.167.14.119:5000"]    #填自己的镜像仓库
      [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
        endpoint = ["http://192.167.14.119:5000"]    #填自己的镜像仓库

2.1.3 设置为系统服务

#1.修改containerd系统服务文件
cat > /usr/lib/systemd/system/containerd.service <<EOF
[Unit]
Description=containerd container runtime
Documentation=https://containerd.io
After=network.target
 
[Service]
ExecStartPre=/sbin/modprobe overlay
ExecStart=/usr/bin/containerd --config /etc/containerd/config.toml
Restart=always
RestartSec=5
LimitNOFILE=infinity
 
[Install]
WantedBy=multi-user.target
EOF

#2.设置自启动

systemctl daemon-reload

systemctl start containerd

systemctl stop containerd

systemctl enable containerd

systemctl restart containerd

#3.查看版本
containerd --version

2.2 haproxy 离线安装

2.2.1 下载haproxy 安装

#主节点安装
192.167.14.119   haproxy+keepalived   master119   
192.167.14.223   haproxy+keepalived   master223
192.167.14.226   haproxy+keepalived   master226

# 下载haproxy
wget https://www.haproxy.org/download/3.1/src/haproxy-3.1.1.tar.gz
#解压
tar -zxvf haproxy-3.1.1.tar.gz -C /usr/local
#进入目录、进行编译、安装
cd /usr/local/haproxy-3.1.1

make TARGET=linux31 PREFIX=/usr/local/haproxy
make install PREFIX=/usr/local/haproxy
#创建目录
mkdir /usr/local/haproxy/logs

2.2.2 haproxy设置为系统服务


#编写脚本

vim /etc/rc.d/init.d/haproxy


#!/bin/bash
# chkconfig: 2345 10 90
# description: HAProxy

BASE_DIR="/usr/local/haproxy"

ARGV="$@"

start() {
    echo "Starting HAProxy servers..."
    if [ -f "$BASE_DIR/haproxy.cfg" ]; then
        $BASE_DIR/sbin/haproxy -f $BASE_DIR/haproxy.cfg
        echo "HAProxy started successfully."
    else
        echo "HAProxy configuration file not found."
        exit 1
    fi
}

stop() {
    echo "Stopping HAProxy..."
    if [ -f "$BASE_DIR/logs/haproxy.pid" ]; then
        kill -USR1 $(cat $BASE_DIR/logs/haproxy.pid)
        echo "HAProxy stopped successfully."
    else
        echo "HAProxy PID file not found."
        exit 1
    fi
}

restart() {
    echo "Restarting HAProxy..."
    stop
    sleep 2 # Wait for a moment to ensure the process has stopped
    start
}

case $ARGV in
    start)
        start
        ERROR=$?
        ;;
    stop)
        stop
        ERROR=$?
        ;;
    restart)
        restart
        ERROR=$?
        ;;
    *)
        echo "Usage: $0 {start|stop|restart}"
        exit 1
esac

exit $ERROR




#赋予脚本权限
chmod +x /etc/rc.d/init.d/haproxy 

#脚本以及开机自启动
chkconfig haproxy on
#添加为系统服务
chkconfig --add haproxy





systemctl start haproxy 
systemctl enable haproxy 
systemctl status haproxy



#配置haproxy.cfg
操作节点:master119   master223  master226  所有节点
cat > /usr/local/haproxy/haproxy.cfg   <<EOF

global
  maxconn  2000
  ulimit-n  16384
  log  127.0.0.1 local0 err
  stats timeout 30s

defaults
  log global
  mode  http
  option  httplog
  timeout connect 5000
  timeout client  50000
  timeout server  50000
  timeout http-request 15s
  timeout http-keep-alive 15s

frontend monitor-in
  bind *:33305
  mode http
  option httplog
  monitor-uri /monitor

frontend k8s-master
  bind  0.0.0.0:16443
  bind  127.0.0.1:16443
  mode tcp
  option tcplog
  tcp-request inspect-delay 5s
  default_backend k8s-apiserver

backend k8s-apiserver
  mode tcp
  option tcp-check
  balance roundrobin
  default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
  server master119  192.167.14.119:6443  check
  server master223  192.167.14.223:6443  check
  server master226  192.167.14.226:6443  check
EOF




#启动
service haproxy start
#停止
service haproxy stop
#重启
service haproxy restart
 
#强制杀死
killall haproxy

pkill -9 haproxy

#查看状态

 systemctl status haproxy
 
 ss -untlp|grep 16443
 
 netstat -lntup|grep haproxy

#访问
#后,可以访问haproxy的监控平台,来查看是否启动成功。

如:192.168.3.237:8888/dbs   账户密码:admin/admin

2.3 keepalived离线安装

2.3.1 安装keepalived

#节点信息
192.167.14.119  
192.167.14.223  
192.167.14.226  

#安装keepalived
https://www.keepalived.org/download.html
tar xvf keepalived-2.2.8.tar.gz
#编译安装
cd   keepalived-2.2.8
./configure --prefix=/usr/local/keepalived --disable-track-process
make && make install

#配置启动
# keepalived启动脚本变量引用文件,默认文件路径是/etc/sysconfig/,也可以不做软链接,直接修改启动脚本中文件路径即可(安装目录下)
cp /usr/local/keepalived/etc/sysconfig/keepalived  /etc/sysconfig/keepalived 
 
# 将keepalived主程序加入到环境变量(安装目录下)
cp /usr/local/keepalived/sbin/keepalived /usr/sbin/keepalived
 
# keepalived启动脚本(源码目录下),放到/etc/init.d/目录下就可以使用service命令便捷调用
cp /home/k8s/k8s/keepalived-2.2.8/keepalived/etc/init.d/keepalived  /etc/init.d/keepalived
 
# 将配置文件放到默认路径下
mkdir /etc/keepalived
cp /usr/local/keepalived/etc/keepalived/keepalived.conf.sample /etc/keepalived/keepalived.conf
 
#加载为系统服务
chkconfig --add keepalived
#开机启动
chkconfig keepalived on

2.3.2 keepalived配置

#keepalived配置

操作节点:master119  master223  master226

master119:
cat > /etc/keepalived/keepalived.conf << EOF


! Configuration File for keepalived
global_defs {
    router_id LVS_DEVEL
script_user root
    enable_script_security
}
vrrp_script chk_apiserver {
    script "/etc/keepalived/check_apiserver.sh"
    interval 5
    weight -5
    fall 2  
rise 1
}
vrrp_instance VI_1 {
    state MASTER
    interface eno3
    mcast_src_ip 192.167.14.119
    virtual_router_id 51
    priority 101
    advert_int 2
    authentication {
        auth_type PASS
        auth_pass K8SHA_KA_AUTH
    }
    virtual_ipaddress {
        192.167.14.205
    }
    track_script {
       chk_apiserver
    }
}
EOF

master223:

操作节点:master223
cat > /etc/keepalived/keepalived.conf << EOF

! Configuration File for keepalived
global_defs {
    router_id LVS_DEVEL
script_user root
    enable_script_security
}
vrrp_script chk_apiserver {
    script "/etc/keepalived/check_apiserver.sh"
    interval 5
    weight -5
    fall 2  
rise 1
}
vrrp_instance VI_1 {
    state BACKUP
    interface ens192
    mcast_src_ip 192.167.14.223
    virtual_router_id 51
    priority 100
    advert_int 2
    authentication {
        auth_type PASS
        auth_pass K8SHA_KA_AUTH
    }
    virtual_ipaddress {
        192.167.14.205
    }
    track_script {
       chk_apiserver
    }
}
EOF

master226:
操作节点:master226
cat > /etc/keepalived/keepalived.conf << EOF

! Configuration File for keepalived
global_defs {
    router_id LVS_DEVEL
script_user root
    enable_script_security
}
vrrp_script chk_apiserver {
    script "/etc/keepalived/check_apiserver.sh"
    interval 5
    weight -5
    fall 2  
rise 1
}
vrrp_instance VI_1 {
    state BACKUP
    interface ens192
    mcast_src_ip 192.167.14.226
    virtual_router_id 51
    priority 99
    advert_int 2
    authentication {
        auth_type PASS
        auth_pass K8SHA_KA_AUTH
    }
    virtual_ipaddress {
        192.167.14.205
    }
    track_script {
       chk_apiserver
    }
}
EOF



#探测脚本

cat > /etc/keepalived/check_apiserver.sh << EOF
#!/bin/bash

# 设置超时时间(秒)
timeout=3
err=0

# 检查haproxy服务是否在运行
for ((i=0; i<timeout; i++)); do
    if systemctl is-active --quiet haproxy; then
        err=0
        break
    else
        err=$(expr $err + 1)
        sleep 1
    fi
done

# 如果haproxy服务未运行,则停止keepalived服务
if [[ $err -ne 0 ]]; then
    echo "Stopping keepalived due to haproxy service not running."
    systemctl stop keepalived
    exit 1
else
    exit 0
fi
EOF

chmod +x /etc/keepalived/check_apiserver.sh



#启动服务


操作节点:master119 master223 master226
systemctl daemon-reload
systemctl enable --now haproxy
systemctl enable --now keepalived


systemctl start keepalived

systemctl status keepalived

systemctl stop keepalived

#安装openssl 安装keepalived时需要openssl
查看openssl版本:openssl version,卸载低版本openssl  1.0.1版本

rpm -qa | grep openssl

#分别删除
yum remove openssl-libs-1.0.2k-25.el7_9.x86_64
yum remove xmlsec1-openssl-1.2.20-7.el7_4.x86_64
yum remove openssl-devel-1.0.2k-25.el7_9.x86_64
yum remove openssl-1.0.2k-25.el7_9.x86_64



 yum remove openssl-devel-1.0.2k-25.el7_9.x86_64
 yum remove openssl-1.0.2k-25.el7_9.x86_64

#解压编译

tar -zxvf openssl-1.1.1i.tar.gz
cd openssl-1.1.1i
./config --prefix=/usr/local/openssl

make && make install

#配置环境变量

cat << EOF >> /etc/profile
export PATH=\$PATH:/usr/local/openssl
EOF

#将lib64路径配置在环境变量中
/usr/local/lib64
/usa/lib64


source /etc/profile

sudo ldconfig

find / -name libcrypto.so.1.1
find / -name libssl.so.1.1

#建立软连接
sudo ln -s /usr/local/lib64/libssl.so.1.1 /usr/lib64/libssl.so.1.1

sudo ln -s /usr/local/lib64/libcrypto.so.1.1 /usr/lib64/libcrypto.so.1.1

echo "/usr/local/lib/" >> /etc/ld.so.conf
ldconfig













#测试集群负载高可用
ip a



https://blog.csdn.net/qq_40914472/article/details/141057235?utm_medium=distribute.pc_relevant.none-task-blog-2~default~baidujs_baidulandingword~default-0-141057235-blog-89599004.235^v43^pc_blog_bottom_relevance_base3&spm=1001.2101.3001.4242.1&utm_relevant_index=3

第3章.高可用k8s集群部署

3.1 hosts文件修改

#1.节点规划
#主节点VIP
192.167.14.205

#etcd节点
192.167.14.228   etcd
192.167.14.229   etcd
192.167.14.246   etcd

#主节点
192.167.14.119  haproxy+keepalived
192.167.14.223  haproxy+keepalived
192.167.14.226  haproxy+keepalived

#工作节点
192.167.14.228   
192.167.14.229   
192.167.14.246  

#修改主机名
hostnamectl set-hostname master119
hostnamectl set-hostname master223
hostnamectl set-hostname master226

hostnamectl set-hostname node228
hostnamectl set-hostname node229
hostnamectl set-hostname node228

#2.所有节点执行以下命令 228 229 246 119 223 226
cat >> /etc/hosts << EOF

127.0.0.1   localhost
::1         localhost

192.167.14.119 master119
192.167.14.223 master223
192.167.14.226 master226

192.167.14.228 node228
192.167.14.229 node229
192.167.14.246 node246
EOF
#查看所有ip
hostname -I


#配置域名 
cat >> /etc/resolv.conf << EOF
nameserver 8.8.8.8
EOF

3.2 关闭防火墙

#关闭防火墙
systemctl stop firewalld && systemctl disable firewalld

3.3 关闭selinux及交换分区

#临时关闭selinux
setenforce 0
#永久关闭selinux
sed -i 's/^SELINUX=enforcing$/SELINUX=diasbled/' /etc/selinux/config
#临时关闭所有的交换分区
swapoff -a
#永久关闭所有的交换分区
sed -i '/swap/s/^\(.*\)$/#\1/g' /etc/fstab

3.4 开启bridge-nf-call-iptalbes

#所有节点执行
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

sudo modprobe overlay
sudo modprobe br_netfilter

# 设置所需的 sysctl 参数,参数在重新启动后保持不变
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables  = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward                 = 1
EOF

# 应用 sysctl 参数而不重新启动
sudo sysctl --system

#通过运行以下指令确认 `br_netfilter` 和 `overlay` 模块被加载:

lsmod | grep br_netfilter
lsmod | grep overlay

3.5 将桥接的IPv4 流量传递到iptables 的链

cat <<EOF >> /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
fs.may_detach_mounts = 1
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl =15
net.ipv4.tcp_max_tw_buckets = 36000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_orphans = 327680
net.ipv4.tcp_orphan_retries = 3
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.ip_conntrack_max = 65536
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.tcp_timestamps = 0
net.core.somaxconn = 16384
EOF

#让其生效
sysctl --system

3.6 时间同步


#1.查看是否安装 服务端操作
rpm -qa | grep ntp
#没有安装则下载安装
https://mirrors.aliyun.com/centos/7.9.2009/os/x86_64/Packages/ntp-4.2.6p5-29.el7.centos.2.x86_64.rpm
rpm -ivh ntp-4.2.6p5-29.el7.centos.2.x86_64.rpm


#2.配置文件
#备份
cp /etc/ntp.conf{,.bak}

cat /etc/ntp.conf
 
 
# 有效配置如下
driftfile /var/lib/ntp/drift
restrict default nomodify notrap nopeer noquery
restrict 127.0.0.1
restrict ::1
server 127.127.1.0 iburst   # 添加内容
fudge 127.127.1.0 stratum 10   # 添加内容
includefile /etc/ntp/crypto/pw
keys /etc/ntp/keys
disable monitor

#3.配置服务
#启动服务
systemctl start ntpd

#查看状态
systemctl status ntpd
 
#重启服务
systemctl restart ntpd

#开机自启
systemctl enable ntpd
#取消开机自启
systemctl disable ntp
#4.设置防火墙
firewall-cmd --permanent --add-service=ntp
firewall-cmd --reload

#5.查看同步状态:
ntpq -p

#6.客户端操作

ntpdate 192.167.14.228
#根据提示确认是否已经同步完成。

#定时同步:
crontab -e
# 配置每小时0分同步一次
0 * * * * /usr/sbin/ntpdate 192.167.14.228

:wq

3.7 部署crictl工具

#上传至192.167.14.119 192.167.14.246 192.167.14.248三个节点
crictl-v1.29.0-linux-amd64.tar.gz
#解压
tar -xzvf crictl-v1.29.0-linux-amd64.tar.gz
#复制可执行程序
cp crictl /usr/local/bin
#配置文件
cat > /etc/crictl.yaml << EOF
runtime-endpoint: unix:///run/containerd/containerd.sock
EOF

3.8 部署runc

#上传可执行程序至192.167.14.119 192.167.14.246 192.167.14.248三个节点
runc.amd64
#复制
cp runc.amd64 /usr/local/bin/runc
#修改权限
chmod +x /usr/local/bin/runc

3.9 containerd导入镜像包


#上传镜像tar包至所有节点 119 223 226 228 229 246,以下镜像tar包在有网环境下载处理好后上传至内网环境
etcd.tar
kube-apiserver.tar
kube-controller-manager.tar
kube-scheduler.tar
kube-proxy.tar
coredns.tar
pause.tar

ctl.tar
operator.tar
typha.tar
pod2daemon-flexvol.tar
csi.tar
node-driver-registrar.tar

cni.tar
node.tar
kube-controllers.tar
apiserver.tar
nfs-subdir-external-provisioner.tar


#导入
ctr -n k8s.io images import etcd.tar
ctr -n k8s.io images import kube-apiserver.tar
ctr -n k8s.io images import kube-controller-manager.tar
ctr -n k8s.io images import kube-scheduler.tar
ctr -n k8s.io images import kube-proxy.tar
ctr -n k8s.io images import coredns.tar
ctr -n k8s.io images import pause.tar

#网络插件calico  镜像标签为docker.io/calico/cni
ctr -n k8s.io images import ctl.tar
ctr -n k8s.io images import operator.tar
ctr -n k8s.io images import typha.tar
ctr -n k8s.io images import pod2daemon-flexvol.tar
ctr -n k8s.io images import csi.tar
ctr -n k8s.io images import node-driver-registrar.tar

ctr -n k8s.io images import cni.tar
ctr -n k8s.io images import node.tar
ctr -n k8s.io images import kube-controllers.tar
ctr -n k8s.io images import apiserver.tar

#nfs存储
ctr -n k8s.io images import nfs-subdir-external-provisioner.tar


#导入服务镜像

ctr -n k8s.io images import example-db-example.tar
ctr -n k8s.io images import example-gateway-example.tar
ctr -n k8s.io images import example-svc-example.tar
ctr -n k8s.io images import example-web-example.tar
ctr -n k8s.io images import example-cloud-eureka.tar
ctr -n k8s.io images import metrics-scraper.tar
ctr -n k8s.io images import metrics-server.tar
ctr -n k8s.io images import nfs-subdir-external-provisioner.tar

#修改镜像标签,以下命令可修改镜像标签
ctr -n k8s.io i tag k8s.gcr.io/csi:v3.26.4 docker.io/calico/csi:v3.26.4
ctr -n k8s.io i tag k8s.gcr.io/ctl:v3.26.4 docker.io/calico/ctl:v3.26.4
ctr -n k8s.io i tag k8s.gcr.io/node-driver-registrar:v3.26.4 docker.io/calico/node-driver-registrar:v3.26.4
ctr -n k8s.io i tag k8s.gcr.io/pod2daemon-flexvol:v3.26.4 docker.io/calico/pod2daemon-flexvol:v3.26.4
ctr -n k8s.io i tag k8s.gcr.io/apiserver:v3.26.4 docker.io/calico/apiserver:v3.26.4

3.10 部署k8s集群安装工具包


#1.k8s集群安装工具包 kubeadm、kubelet、kubectl
a24e42254b5a14b67b58c4633d29c27370c28ed6796a80c455a65acc813ff374-kubectl-1.28.2-0.x86_64.rpm
cee73f8035d734e86f722f77f1bf4e7d643e78d36646fd000148deb8af98b61c-kubeadm-1.28.2-0.x86_64.rpm
conntrack-tools-1.4.4-7.el7.x86_64.rpm
e1cae938e231bffa3618f5934a096bd85372ee9b1293081f5682a22fe873add8-kubelet-1.28.2-0.x86_64.rpm
libnetfilter_cthelper-1.0.0-11.el7.x86_64.rpm
libnetfilter_cttimeout-1.0.0-7.el7.x86_64.rpm
libnetfilter_queue-1.0.2-2.el7_2.x86_64.rpm
socat-1.7.3.2-2.el7.x86_64.rpm

#安装

yum localinstall *.rpm
#忽略依赖检查安装(可选)
rpm -ivh --nodeps *.rpm
#查看需要的镜像
kubeadm config images list
#上述命令输出结果如下
registry.k8s.io/kube-apiserver:v1.28.2
registry.k8s.io/kube-controller-manager:v1.28.2
registry.k8s.io/kube-scheduler:v1.28.2
registry.k8s.io/kube-proxy:v1.28.2
registry.k8s.io/pause:3.9
registry.k8s.io/etcd:3.5.9-0
registry.k8s.io/coredns/coredns:v1.10.1 

#修改kubelet驱动
#为了实现docker使用的cgroupdriver与kubelet使用的cgroup的一致性,建议修改如下文件内容

vim /etc/sysconfig/kubelet

KUBELET_EXTRA_ARGS="--cgroup-driver=systemd"

#设置kubelet为开机自启动即可,由于没有生成配置文件,集群初始化后自动启动

systemctl enable kubelet  

3.11 初始化集群

十二.初始化集群

#生成初始化配置文件:

cat > kubeadm-config.yaml << EOF

apiVersion: kubeadm.k8s.io/v1beta3
kind: InitConfiguration
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: abcdef.0123456789abcdef
  ttl: 24h0m0s
  usages:
  - signing
  - authentication
localAPIEndpoint:
  advertiseAddress: 192.167.14.226
  bindPort: 6443
nodeRegistration:
  criSocket: unix:///run/containerd/containerd.sock
  imagePullPolicy: IfNotPresent
  name: master223
  taints:
  - effect: NoSchedule
    key: node-role.kubernetes.io/master

---
apiVersion: kubeadm.k8s.io/v1beta3
kind: ClusterConfiguration
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: "192.167.14.205:16443"
apiServer:
  certSANs:
  - master119
  - master223
  - master226
  - 192.167.14.119
  - 192.167.14.223
  - 192.167.14.226
  - 192.167.14.205
  - 127.0.0.1
  extraArgs:
    authorization-mode: Node,RBAC
  timeoutForControlPlane: 4m0s
controllerManager: {}
dns:
  imageRepository: k8s.gcr.io
  imageTag: v1.10.1
etcd:
  external:
    endpoints:
    - https://192.167.14.228:2379
    - https://192.167.14.229:2379
    - https://192.167.14.246:2379
    caFile: /opt/etcd/ssl/ca.pem
    certFile: /opt/etcd/ssl/etcd.pem
    keyFile: /opt/etcd/ssl/etcd-key.pem
imageRepository: k8s.gcr.io
kubernetesVersion: 1.28.2
networking:
  dnsDomain: cluster.local
  serviceSubnet: 10.96.0.0/12
  podSubnet: 10.244.0.0/16
scheduler: {}

---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs

---
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
cgroupDriver: systemd
maxPods: 200
maxOpenFiles: 1000000
serializeImagePulls: false
allowedUnsafeSysctls:
- 'kernel.msg*'
- 'net.core.somaxconn'
- 'net.ipv4.ip_local_port_range'
- 'net.ipv4.tcp_syncookies'
- 'net.ipv4.conf.all.forwarding'


EOF

#查看需要镜像
kubeadm config images list --config kubeadm-config.yaml

#使用kubeadm初始化k8s集群

kubeadm init --config kubeadm-config.yaml 

#重置
kubeadm reset config --config kubeadm-config.yaml



kubectl -n kube-system get cm kubeadm-config -o yaml

3.12 加入主节点和工作节点


十三.加入主节点和工作节点


#master226节点执行以下操作

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

#复制配置文件

  export KUBECONFIG=/etc/kubernetes/admin.conf
  
#在master119 master223执行
 
mkdir -p /etc/kubernetes/pki/
#在master226上将证书复制到master119 master223

cd /etc/kubernetes/pki/
scp -r * root@192.167.14.119:/etc/kubernetes/pki
scp -r * root@192.167.14.223:/etc/kubernetes/pki


twzw@dean

#复制admin
cd /etc/kubernetes/

scp admin.conf root@192.167.14.119:/etc/kubernetes/

scp admin.conf root@192.167.14.223:/etc/kubernetes/

twzw@dean



 

##复制加入master join命令在master2执行:

kubeadm join 192.168.52.88:16443 --token abcdef.0123456789abcdef \

      --discovery-token-ca-cert-hash sha256:810417c282289b90e8fd6cf5b7be1d5a9994a4408494a1e77bd34bde41edcd04 \

      --control-plane

 

#拷贝kubectl使用的连接k8s认证文件到默认路径:

mkdir -p $HOME/.kube

sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config

sudo chown $(id -u):$(id -g) $HOME/.kube/config



#添加网关路由,临时路由电脑重启后会丢失,需要持久化

route add default gw 192.167.14.1 

route -n
#删除路由



#将其它直接点添加到集群

  kubeadm join 192.167.14.204:16443 --token abcdef.0123456789abcdef \
        --discovery-token-ca-cert-hash sha256:5d2a8da728960cc764109e81fb70c3d2a7c5f455bcd6988a649a57205fd1930a \
        --control-plane 

#将其它工作节点添加到集群 

kubeadm join 192.167.14.204:16443 --token abcdef.0123456789abcdef \
        --discovery-token-ca-cert-hash sha256:5d2a8da728960cc764109e81fb70c3d2a7c5f455bcd6988a649a57205fd1930a 
        
        
        
        
 #新的master加入,master01上执行
kubeadm token create --print-join-command
#要加入的节点执行,上面输出的命名后面需要加上--control-plane
kubeadm join 172.17.68.66:8443 --token a930ez.69wo7j00zeu1gghq --discovery-token-ca-cert-hash sha256:2240cd1f54550c5a4a57b3d3a8671bca10fa6 --control-plane --cri-socket unix:///var/run/cri-dockerd.sock



#为了实现docker使用的cgroupdriver与kubelet使用的cgroup的一致性,建议修改如下文件内容

vim /etc/sysconfig/kubelet

KUBELET_EXTRA_ARGS="--cgroup-driver=systemd"

#设置kubelet为开机自启动即可,由于没有生成配置文件,集群初始化后自动启动

systemctl enable kubelet  

#把k8s-node1的ROLES变成work

kubectl label node node228 node-role.kubernetes.io/worker=worker
kubectl label node node229 node-role.kubernetes.io/worker=worker
kubectl label node node246 node-role.kubernetes.io/worker=worker

#删除角色标签
kubectl label nodes node228 node-role.kubernetes.io/worker-

kubectl get nodes

3.13 安装网络插件calico


#网卡停止及删除
ip link set ens4f1 down

ip link delete 网卡

#查看路由
route -n
ip route show
route add default gw 192.167.14.1

#执行
kubectl create -f tigera-operator.yaml

kubectl create -f custom-resources.yaml  

#清缓存
echo 3 > /proc/sys/vm/drop_caches

第4章 .k8s管理界面Kuboard

4.1 Kuboard安装

4.1.1 安装包下载

#有网环境下载镜像包 docker版本不低于docker 19.03  Kubernetes不低于 v1.13
docker pull swr.cn-east-2.myhuaweicloud.com/kuboard/kuboard:v3
#打为本地tar包
docker savae -o kuboard3.tar swr.cn-east-2.myhuaweicloud.com/kuboard/kuboard:v3

4.1.2 Kuboard安装

#将kuboard3.tar上传至内网,docker加载镜像包
docker load < kuboard3.tar
#启动

docker run -d \
  --restart=unless-stopped \
  --name=kuboard \
  -p 30080:80/tcp \
  -p 10081:10081/tcp \
   #k8s集群api-server地址
  -e KUBOARD_ENDPOINT="http://192.167.14.205:16443" \
  -e KUBOARD_AGENT_SERVER_TCP_PORT="10081" \
  -v /root/kuboard-data:/data \
  swr.cn-east-2.myhuaweicloud.com/kuboard/kuboard:v3

4.1.3 登录Kuboard

#登录默认账号
admin 密码:Kuboard123

4.2 Kuboard使用

4.2.1 导入k8s集群

4.2.2 查看节点

4.3.3 安装Kuboard套件

#测试不通
curl -ik https://192.167.14.119:10257
curl -ik https://192.167.14.119:10259

curl -ik https://192.167.14.223:10257
curl -ik https://192.167.14.223:10259

curl -ik https://192.167.14.226:10257
curl -ik https://192.167.14.226:10259

#

第5章 k8s问题排查

5.1 排查思路

1.步骤1:检查节点状态

kubectl get nodes
#查看详细信息
kubectl describe node <node-name>

2.步骤2:查看事件

kubectl get events

3.步骤3:检查节点资源

kubectl describe node <node-name> | grep Allocated -A 5

4.步骤4:网络排查

kubectl get pods -n kube-system

5.步骤5:检查容器运行时状态

docker ps
docker logs <container-id>

6.步骤6:检查kubelet服务状态

systemctl status kubelet
sudo systemctl restart kubelet

7.步骤7:重启故障节点

在确保不影响生产负载的情况下,可以尝试重启故障的节点。使用以下命令:
kubectl drain <node-name> --ignore-daemonsets
kubectl delete node <node-name>

5.2 问题分类

1.查看组件状态

kubectl get cs

2.查看容器节点运行信息

kubectl get pod -o wide -n kube-system

3.查看pod详细信息

kubectl describe pods kube-apiserver-master119 -n kube-system

4.ContainerCreating

#1.网络问题导致
#2.存储问题导致
#3.
#查看所有pod
kubectl get pod -o wide -n kube-system
kubectl get pods -A
#查看详细信息
kubectl describe pods coredns-7b56cf8b67-zgg44  -n kube-system

#打印错误日志
journalctl -u kubelet -n 10 | grep -i error

#删除集群中某个节点
kubectl drain master119 --ignore-daemonsets --delete-emptydir-data
kubectl delete node master119

#强制删除pod
kubectl delete pod coredns-7b56cf8b67-zgg44 --force -n kube-system

5.CrashLoopBackOff

#CrashLoopBackOff 状态说明容器曾经启动了,但又异常退出了。此时 Pod 的 RestartCounts 通常是大于 0 的,可以先查看一下容器的日志

kubectl describe pod <pod-name>
kubectl logs <pod-name>
kubectl logs --previous <pod-name>

#删除pod
kubectl delete pod <pod-name> -n <namespace>


kubectl delete pods --all -n <namespace>

#查看 CoreDNS配置文件
 kubectl get configmap coredns -n kube-system -o yaml
 
#重启coredns
kubectl rollout restart deployment/coredns -n kube-system

6.ImagePullBackOff

#这通常是镜像名称配置错误或者私有镜像的密钥配置错误导致。这种情况可以使用 docker pull <image> 来验证镜像是否可以正常拉取。


kubectl get pod -o wide -n calico-system

kubectl get pod -o wide -n kube-system

kubectl get pod -o wide -n calico-apiserver





kubectl describe pod coredns-b4569b87b-k8hxm  -n calico-system


kubectl describe pod calico-apiserver-59c8f7c46b-dk9tv  -n calico-apiserver



kubectl describe pod coredns-7b56cf8b67-b9qr7   -n kube-system

kubectl logs  coredns-b4569b87b-k8hxm  -n  kube-system

kubectl delete pod coredns-b4569b87b-8pjmc  -n  kube-system

#查看coredns配置
kubectl get configmap coredns -n kube-system -o yaml

#查看所有命令空间
kubectl get ns
#查看所有pod
kubectl get pods --all-namespaces -o wide