由于我之前的环境是 docker 环境，但 docker 作为运行时已不具备优势，所以准备将我的环境切换为 containerd。

我之前的 k8s 使用的是 1.23. 目前切到最新 1.25.3

由于存在 containerd，crictl，k8s 等多个主要组件的切换，出现了一些小问题。

主要是 containerd， containerd 的客户端和服务端版本竟然不一致，所以单独 apt remove containerd，然后重装。

然后就是 cgroup 和 crictl sock, 以及 pause 拉取的问题


root@empty:~# cat /etc/crictl.yaml
runtime-endpoint: "unix:///var/run/containerd/containerd.sock"
image-endpoint: "unix:///var/run/containerd/containerd.sock"
timeout: 10
debug: false
pull-image-on-create: false
disable-pull-on-run: false


root@empty:~# containerd config default > /tmp/config.toml
root@empty:~#
root@empty:~#
root@empty:~# diff /tmp/config.toml /etc/containerd/config.toml
61c61
<     sandbox_image = "registry.k8s.io/pause:3.6"
---
>     sandbox_image = "registry.cn-beijing.aliyuncs.com/kubesphereio/pause:3.8"
125c125
<             SystemdCgroup = false
---
>             SystemdCgroup = true
root@empty:~#

重启 containerd 并 enable 该服务设置开机启动

我的集群创建模版



 kk-ovn.yaml

apiVersion: kubekey.kubesphere.io/v1alpha2
kind: Cluster
metadata:
  name: empty
spec:
  hosts:
  - {name: empty, address: <node eth0 ip>, internalAddress: <node eth0 ip>, user: xxx, password: "xxxx"}
  roleGroups:
    etcd:
    - empty
    control-plane:
    - empty
    worker:
    - empty
  controlPlaneEndpoint:
    ## Internal loadbalancer for apiservers
    # internalLoadbalancer: haproxy
    domain: lb.kubesphere.local
    address: ""
    port: 6443
  kubernetes:
    containerManager: containerd
    version: v1.25.3
    clusterName: cluster.local
    autoRenewCerts: true
  system:
    # The ntp servers of chrony.
    ntpServers:
      - time1.cloud.tencent.com
      - ntp.aliyun.com
      - node1 # Set the node name in `hosts` as ntp server if no public ntp servers access.
    timezone: "Asia/Shanghai"
    # Specify additional packages to be installed. The ISO file which is contained in the artifact is required.
    rpms:
      - nfs-utils
    # Specify additional packages to be installed. The ISO file which is contained in the artifact is required.
    debs:
      - nfs-common
  etcd:
    type: kubekey
  network:
    plugin: none
    multusCNI:
      enabled: true
    kubePodsCIDR: 10.16.0.0/16
    kubeServiceCIDR: 10.96.0.0/12
  registry:
    privateRegistry: ""
    namespaceOverride: ""
    registryMirrors: []
    insecureRegistries: []
  addons: []




# cat /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1

net.ipv4.tcp_keepalive_time=600 #此参数表示TCP发送keepalive探测消息的间隔时间(秒)
net.ipv4.tcp_keepalive_intvl=30 #tcp检查间隔时间（keepalive探测包的发送间隔）
net.ipv4.tcp_keepalive_probes=10  #tcp检查次数（如果对方不予应答，探测包的发送次数）
net.ipv6.conf.all.disable_ipv6=1 #禁用IPv6，修为0为启用IPv6
net.ipv6.conf.default.disable_ipv6=1 #禁用IPv6，修为0为启用IPv6
net.ipv6.conf.lo.disable_ipv6=1 #禁用IPv6，修为0为启用IPv6
net.ipv4.neigh.default.gc_stale_time=120 #ARP缓存条目超时
net.ipv4.conf.all.rp_filter=0  #默认为1，系统会严格校验数据包的反向路径，可能导致丢包
net.ipv4.conf.default.rp_filter=0 #不开启源地址校验
net.ipv4.conf.default.arp_announce=2 #始终使用与目的IP地址对应的最佳本地IP地址作为ARP请求的源IP地址
net.ipv4.conf.lo.arp_announce=2 #始终使用与目的IP地址对应的最佳本地IP地址作为ARP请求的源IP地址
net.ipv4.conf.all.arp_announce=2 #始终使用与目的IP地址对应的最佳本地IP地址作为ARP请求的源IP地址
net.ipv4.ip_local_port_range= 45001 65000 # 定义网络连接可用作其源（本地）端口的最小和最大端口的限制，同时适用于TCP和UDP连接。
net.ipv4.ip_forward=1 # 其值为0,说明禁止进行IP转发；如果是1,则说明IP转发功能已经打开。
net.ipv4.tcp_max_tw_buckets=6000 #配置服务器 TIME_WAIT 数量
net.ipv4.tcp_syncookies=1 #此参数应该设置为1，防止SYN Flood
net.ipv4.tcp_synack_retries=2 #表示回应第二个握手包（SYN+ACK包）给客户端IP后，如果收不到第三次握手包（ACK包），进行重试的次数（默认为5）
net.bridge.bridge-nf-call-ip6tables=1 # 是否在ip6tables链中过滤IPv6包
net.bridge.bridge-nf-call-iptables=1 # 二层的网桥在转发包时也会被iptables的FORWARD规则所过滤，这样有时会出现L3层的iptables rules去过滤L2的帧的问题
net.netfilter.nf_conntrack_max=2310720 #连接跟踪表的大小，建议根据内存计算该值CONNTRACK_MAX = RAMSIZE (in bytes) / 16384 / (x / 32)，并满足nf_conntrack_max=4*nf_conntrack_buckets，默认262144

net.ipv6.neigh.default.gc_thresh1=8192
net.ipv6.neigh.default.gc_thresh2=32768
net.ipv6.neigh.default.gc_thresh3=65536

#gc_thresh3 是表大小的绝对限制
#gc_thresh2 设置为等于系统的最大预期邻居条目数的值
#在这种情况下，gc_thresh3 应该设置为一个比 gc_thresh2 值高的值，例如，比 gc_thresh2 高 25%-50%，将其视为浪涌容量。
#gc_thresh1 提高到较大的值；此设置的作用是，如果表包含的条目少于 gc_thresh1，内核将永远不会删除（超时）过时的条目。

net.core.netdev_max_backlog=16384 # 每CPU网络设备积压队列长度
net.core.rmem_max = 16777216 # 所有协议类型读写的缓存区大小
net.core.wmem_max = 16777216 # 最大的TCP数据发送窗口大小
net.ipv4.tcp_max_syn_backlog = 8096 # 第一个积压队列长度
net.core.somaxconn = 32768 # 第二个积压队列长度
fs.inotify.max_user_instances=8192 # 表示每一个real user ID可创建的inotify instatnces的数量上限，默认128.
fs.inotify.max_user_watches=524288 # 同一用户同时可以添加的watch数目，默认8192。
fs.file-max=52706963 # 文件描述符的最大值
fs.nr_open=52706963 #设置最大微博号打开数
kernel.pid_max = 4194303 #最大进程数
net.bridge.bridge-nf-call-arptables=1 #是否在arptables的FORWARD中过滤网桥的ARP包
vm.swappiness=0 # 禁止使用 swap 空间，只有当系统 OOM 时才允许使用它
vm.overcommit_memory=1 # 不检查物理内存是否够用
vm.panic_on_oom=0 # 开启 OOM
vm.max_map_count = 262144

集群重新部署挺快的，不到一分钟能拉起一个 all in one的环境

docker vs containerd

我之前的 docker 环境 pod 是没有使用网络 ns的，部署为 containerd 后， pod 在 ns 内

kubekey install with containerd

docker vs containerd