Arm主机部署Kubernetes单节点集群

82 阅读4分钟

前提

在mac pro (m3)上的虚拟机,ubuntu-22.04.5-live-server-arm64

由于某些原因,要安装1.23.10版本的kubernetes并使用docker(不是containerd)

root@ubuntu:~# uname -a
Linux ubuntu 5.15.0-142-generic #152-Ubuntu SMP Mon May 19 11:06:29 UTC 2025 aarch64 aarch64 aarch64 GNU/Linux
root@ubuntu:~# uname -m
aarch64
root@ubuntu:~# lsb_release -a
No LSB modules are available.
Distributor ID:	Ubuntu
Description:	Ubuntu 22.04.5 LTS
Release:	22.04
Codename:	jammy

查看cgroup版本

cgroup2

root@ubuntu:~# cat /proc/mounts | grep cgroup
cgroup2 /sys/fs/cgroup cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate,memory_recursiveprot 0 0

关闭swap分区

永久关闭:修改 /etc/fstab 注释掉swap.img开头的那一行 临时关闭:swapoff -a

关闭SELinux

getenforce # 查看状态,没有安装的话就不用管了

关闭防火墙

ufw status # 若终端输出 “Status: inactive”,则表示防火墙已成功关闭且不会在开机时自动启动。

统一时区、时间

以后可能添加节点进来

timedatectl set-timezone Asia/Shanghai
date 
# systemctl restart systemd-timesyncd.service # 不对的话要重启时间同步服务

开启流量转发

cat <<EOF | tee /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward=1
EOF
# 应用参数
sysctl --system
# 查看是否开启成功
sysctl net.ipv4.ip_forward

固定主机IP,不要dhcp

以后可能添加节点进来

Netplan 配置文件通常位于/etc/netplan/目录下。当前主机下的文件内容:

50-cloud-init.yaml

# This file is generated from information provided by the datasource.  Changes
# to it will not persist across an instance reboot.  To disable cloud-init's
# network configuration capabilities, write a file
# /etc/cloud/cloud.cfg.d/99-disable-network-config.cfg with the following:
# network: {config: disabled}
network:
    ethernets:
        enp0s5:
            dhcp4: true
    version: 2

因此要创建文件 /etc/cloud/cloud.cfg.d/99-disable-network-config.cfg, 文件内容为network: {config: disabled}

修改 50-cloud-init.yaml为如下的

network:
  ethernets:
    enp0s5: #
      dhcp4: false # 确保这里是false
      addresses:
        - 192.168.2.84/24 # 要固定的IP地址
      routes: # 网关
        - to: default
          via: 192.168.2.1
      nameservers:
        addresses:
        - 223.5.5.5 # 阿里DNS
        - 8.8.8.8   # 谷歌DNS
  version: 2

如果修改了netplan文件,要应用一下

netplan apply

安装docker

  • 下载安装
wget https://download.docker.com/linux/static/stable/aarch64/docker-20.10.20.tgz
tar -zxvf docker-20.10.20.tgz
cp docker/* /usr/bin/
  • systemd启动

添加 /etc/systemd/system/docker.service 文件,内容如下

[Unit]
Description=Docker Application Container Engine
Documentation=https://docs.docker.com
After=network-online.target firewall.service
Wants=network-online.target
[Service]
Type=notify
#the default is not to use systemd for cgroups because the delegay issues still
#exists and systemd currently does not support the cgroup feature set required
#for containers run by docker
ExecStart=/usr/bin/dockerd
ExecReload=/bin/kill -s HUP $MAINPID
#Having non-zero Limit*s causes performance problems due to accounting overhead
#in the kernel. We recommend using cgroups to do container-local accounting.
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity
#Uncomment TasksMax if your systemd version supports it.
#Only Systemd 226 and above support this version
#TasksMax=infinity
TimeoutStartSec=0
#set delegate yes so that systemd does not reset the cgroups of docker containers
Delegate=yes
#kill only the docker process, not all process in the cgroup
KillMode=process
#restart the docker process if it exits prematurely
Restart=on-failure
StartLimitBurst=3
StartLimitInterval=60s
[Install]
WantedBy=multi-user.target
  • 修改docker的cgroup驱动为systemd,因为kubelet的默认是systemd,需要保持一致

在/etc/docker/daemon.json中添加

{
  "exec-opts": ["native.cgroupdriver=systemd"]
}
  • 启动docker
chmod +x /etc/systemd/system/docker.service
systemctl daemon-reload
systemctl enable docker.service
systemctl start docker.service
systemctl status docker.service

docker -v

安装kubeadmin、kubelet、kubectl

这里跟官网的步骤大致相同

# 要安装的集群版本
VERSION=v1.23.10

# 安装CNI插件
CNI_PLUGINS_VERSION="v1.3.0"
ARCH="arm64"
DEST="/opt/cni/bin"
mkdir -p "$DEST"
curl -L "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-${ARCH}-${CNI_PLUGINS_VERSION}.tgz" | tar -C "$DEST" -xz
ls "$DEST"

DOWNLOAD_DIR="/usr/local/bin"
mkdir -p "$DOWNLOAD_DIR"


# 安装CRICTL,不同于官网,这一步是必须的
CRICTL_VERSION="v1.23.0"
wget "https://github.com/kubernetes-sigs/cri-tools/releases/download/${CRICTL_VERSION}/crictl-${CRICTL_VERSION}-linux-${ARCH}.tar.gz"
tar xz -f crictl-v1.23.0-linux-arm64.tar.gz
mv crictl /usr/local/bin/crictl

cd $DOWNLOAD_DIR

# 下载
curl -L --remote-name-all https://dl.k8s.io/release/$VERSION/bin/linux/arm64/{kubeadm,kubelet} https://dl.k8s.io/release/$VERSION/bin/linux/arm64/kubectl
chmod +x kube*

# 配置kubelet的启动
RELEASE_VERSION="v0.16.2"
curl -sSL "https://raw.githubusercontent.com/kubernetes/release/${RELEASE_VERSION}/cmd/krel/templates/latest/kubelet/kubelet.service" | sed "s:/usr/bin:${DOWNLOAD_DIR}:g" | tee /usr/lib/systemd/system/kubelet.service

mkdir -p /usr/lib/systemd/system/kubelet.service.d

curl -sSL "https://raw.githubusercontent.com/kubernetes/release/${RELEASE_VERSION}/cmd/krel/templates/latest/kubeadm/10-kubeadm.conf" | sed "s:/usr/bin:${DOWNLOAD_DIR}:g" | tee /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf

# 激活并启动kubelet
systemctl enable --now kubelet

准备master的配置等

  • 确认节点的ip: 192.168.2.84

  • 配置一下主机别名

修改/etc/hosts, 添加

192.168.2.84 master0

  • 生成kubeadm.yaml文件

kubeadm config print init-defaults --kubeconfig ClusterConfiguration > kubeadm.yml

这里面信息都很有用的,可以重点看一下

apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: abcdef.0123456789abcdef
  ttl: 24h0m0s
  usages:
  - signing
  - authentication
kind: InitConfiguration
localAPIEndpoint:
  advertiseAddress: 192.168.2.84 # master的IP
  bindPort: 6443
nodeRegistration:
  criSocket: /var/run/dockershim.sock # 注意这里,运行时是docker,containerd的话是另一个地址
  imagePullPolicy: IfNotPresent
  name: master0 # master的别名,配置在/etc/hosts了
  taints: null
---
apiServer:
  timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd:
  local:
    dataDir: /var/lib/etcd
# imageRepository: k8s.gcr.io
# 换成国内的源
imageRepository: registry.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: 1.23.10 # 安装的版本,和上面的$VERSION保持一致
networking:
  podSubnet: 10.244.0.0/16     # Pod 网络
  dnsDomain: cluster.local
  serviceSubnet: 10.245.0.0/16 # Service 网络
scheduler: {}

podSubnet要与flannel的Network保持一致;podSubnet和serviceSubnet要完全隔离。10.244.0.0/16是flannel的默认值。

预先拉取镜像

kubeadm config images list --config kubeadm.yml

处理依赖问题

临时的解决方法如下

mkdir /sys/fs/cgroup/systemd
mount -t cgroup -o none,name=systemd cgroup /sys/fs/cgroup/systemd

彻底的解决方法:todo

  • conntrack
apt install conntrack

初始化master

kubeadm init --config=kubeadm.yml --upload-certs | tee kubeadm-init.log

成功了,输出如下

[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.168.2.84:6443 --token abcdef.0123456789abcdef \
	--discovery-token-ca-cert-hash sha256:50baf766726b7e86b6ee1540b0a7c6b545ae287ac26eb7aba7b00dbe64edaae8
root@ubuntu:~/k8#

配置kubeconfig

mkdir -p $HOME/.kube
cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config

安装flannel

下载flannel的配置

wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml

注意到flannel的Network和上面是一致的

  net-conf.json: |
    {
      "Network": "10.244.0.0/16",
      "EnableNFTables": false,
      "Backend": {
        "Type": "vxlan"
      }
    }

部署flannel需要两个镜像ghcr.io/flannel-io/flannel-cni-plugin:v1.7.1-flannel1ghcr.io/flannel-io/flannel:v0.27.0,将这两个镜像替换成国内的源,这里用南京大学的.把ghcr.io 替换为 ghcr.nju.edu.cn 即可

部署

kubectl apply -f kube-flannel.yml

允许master上调度POD

kubectl taint nodes --all node-role.kubernetes.io/master-

创建工作负载测试

apiVersion: apps/v1
kind: Deployment
metadata:
  name: nginx-deployment
  labels:
    app: nginx
spec:
  replicas: 1
  selector:
    matchLabels:
      app: nginx
  template:
    metadata:
      labels:
        app: nginx
    spec:
      containers:
      - name: nginx
        image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/nginx:stable-linuxarm64 # 注意要用arm的nginx
        ports:
        - containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
  name: nginx-svc
  labels:
    name: nginx-svc
spec:
  type: NodePort
  ports:
  - port: 80         
    targetPort: 80  
    protocol: TCP
  selector:
    app: nginx         

TODO

1. 切回cgroup v1。cgroup mountpoint does not exist的问题 。一旦重启可能会切换回v2

2. 延长证书期限或证书自动更换