Linux 容器运行时(runc/containerd)原理浅析

117 阅读7分钟

1. 容器运行时基础概念

1.1 什么是容器运行时

容器运行时是在操作系统层面提供容器执行环境的软件组件。它负责管理容器的整个生命周期,包括创建、启动、停止和删除容器。在Linux环境中,容器运行时利用内核的命名空间、cgroups、capabilities等特性来提供隔离的执行环境。

1.2 容器运行时架构演进

graph TD
    A[Docker 1.10及之前] --> B[Docker 1.11及之后]
    B --> C[Kubernetes CRI标准]
    
    subgraph A1 [早期架构]
        A2[Docker Daemon] --> A3[runc]
    end
    
    subgraph B1 [解耦架构]
        B2[Docker Daemon] --> B3[containerd] --> B4[runc]
    end
    
    subgraph C1 [标准化架构]
        C2[kubelet] --> C3[CRI] --> C4[containerd] --> C5[runc]
    end

2. 环境准备与依赖安装

2.1 系统要求检查

创建系统检查脚本:

文件名:check_system.sh

#!/bin/bash

echo "=== 系统环境检查 ==="

# 检查内核版本
echo "1. 检查内核版本:"
uname -r
echo ""

# 检查操作系统
echo "2. 检查操作系统:"
cat /etc/os-release
echo ""

# 检查cgroup支持
echo "3. 检查cgroup支持:"
if [ -d /sys/fs/cgroup ]; then
    echo "cgroup文件系统已挂载"
    ls /sys/fs/cgroup
else
    echo "警告: cgroup文件系统未找到"
fi
echo ""

# 检查命名空间支持
echo "4. 检查命名空间支持:"
if [ -e /proc/1/ns/mnt ]; then
    echo "命名空间支持已启用"
else
    echo "错误: 命名空间支持未启用"
fi
echo ""

# 检查设备映射器
echo "5. 检查设备映射器:"
if command -v dmsetup >/dev/null 2>&1; then
    echo "设备映射器已安装"
    dmsetup version
else
    echo "警告: 设备映射器未安装"
fi
echo ""

echo "=== 环境检查完成 ==="

运行系统检查:

chmod +x check_system.sh
./check_system.sh

2.2 安装必要依赖

文件名:install_dependencies.sh

#!/bin/bash

echo "开始安装容器运行时依赖..."

# 更新系统包管理器
sudo apt-get update

# 安装基础依赖
sudo apt-get install -y \
    apt-transport-https \
    ca-certificates \
    curl \
    gnupg \
    lsb-release \
    software-properties-common \
    git \
    build-essential \
    pkg-config \
    libseccomp-dev

# 安装Go语言环境(用于编译runc)
wget https://golang.org/dl/go1.19.linux-amd64.tar.gz
sudo tar -C /usr/local -xzf go1.19.linux-amd64.tar.gz
echo 'export PATH=$PATH:/usr/local/go/bin' >> ~/.bashrc
echo 'export GOPATH=$HOME/go' >> ~/.bashrc
source ~/.bashrc

# 验证安装
echo "=== 验证依赖安装 ==="
go version
gcc --version
make --version

echo "依赖安装完成"

3. runc原理与实战

3.1 runc架构解析

runc是一个符合OCI(Open Container Initiative)标准的容器运行时,它直接与Linux内核交互来创建和运行容器。

graph TB
    A[runc create] --> B[创建容器规范]
    B --> C[设置命名空间]
    C --> D[配置cgroups]
    D --> E[设置rootfs]
    E --> F[执行容器进程]
    F --> G[容器运行]
    
    H[Linux内核] --> I[命名空间]
    H --> J[cgroups]
    H --> K[文件系统]
    
    F --> I
    F --> J
    F --> K

3.2 从源码编译runc

文件名:build_runc.sh

#!/bin/bash

echo "开始编译runc..."

# 创建工作目录
mkdir -p ~/container-runtime
cd ~/container-runtime

# 克隆runc源码
git clone https://github.com/opencontainers/runc.git
cd runc

# 切换到稳定版本
git checkout v1.1.4

# 编译runc
echo "编译runc..."
make

# 安装到系统路径
sudo make install

# 验证安装
echo "验证runc安装:"
runc --version

echo "runc编译安装完成"

3.3 创建基本容器

文件名:create_basic_container.sh

#!/bin/bash

echo "创建基本容器演示..."

# 创建容器工作目录
CONTAINER_DIR="$HOME/basic-container"
mkdir -p $CONTAINER_DIR/rootfs

# 创建rootfs(使用busybox作为基础)
cd $CONTAINER_DIR
mkdir -p rootfs/bin rootfs/lib rootfs/lib64

# 复制busybox二进制文件
if command -v busybox >/dev/null 2>&1; then
    cp $(which busybox) rootfs/bin/
else
    # 下载busybox静态二进制文件
    wget -O rootfs/bin/busybox https://busybox.net/downloads/binaries/1.35.0-x86_64-linux-musl/busybox
    chmod +x rootfs/bin/busybox
fi

# 创建必要的符号链接
cd rootfs/bin
./busybox --install .

# 回到容器目录
cd $CONTAINER_DIR

# 创建基本目录结构
mkdir -p rootfs/dev rootfs/proc rootfs/sys rootfs/etc rootfs/home

# 创建设备文件
sudo mknod rootfs/dev/console c 5 1
sudo mknod rootfs/dev/null c 1 3
sudo mknod rootfs/dev/zero c 1 5
sudo mknod rootfs/dev/ptmx c 5 2
sudo mknod rootfs/dev/tty c 5 0
sudo mknod rootfs/dev/tty1 c 4 1

# 设置权限
sudo chmod 666 rootfs/dev/null
sudo chmod 666 rootfs/dev/zero
sudo chmod 666 rootfs/dev/tty
sudo chmod 666 rootfs/dev/console
sudo chmod 666 rootfs/dev/ptmx

echo "基本容器文件系统准备完成"

3.4 创建OCI运行时配置

文件名:create_oci_config.sh

#!/bin/bash

CONTAINER_DIR="$HOME/basic-container"
cd $CONTAINER_DIR

# 使用runc spec命令生成默认配置
runc spec --rootless

# 修改配置文件以适配我们的简单容器
cat > config.json << 'EOF'
{
    "ociVersion": "1.0.2-dev",
    "process": {
        "terminal": true,
        "user": {
            "uid": 0,
            "gid": 0
        },
        "args": [
            "sh"
        ],
        "env": [
            "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
            "TERM=xterm",
            "HOME=/root",
            "SHELL=/bin/sh"
        ],
        "cwd": "/",
        "capabilities": {
            "bounding": [
                "CAP_AUDIT_WRITE",
                "CAP_KILL",
                "CAP_NET_BIND_SERVICE"
            ],
            "effective": [
                "CAP_AUDIT_WRITE",
                "CAP_KILL",
                "CAP_NET_BIND_SERVICE"
            ],
            "inheritable": [
                "CAP_AUDIT_WRITE",
                "CAP_KILL",
                "CAP_NET_BIND_SERVICE"
            ],
            "permitted": [
                "CAP_AUDIT_WRITE",
                "CAP_KILL",
                "CAP_NET_BIND_SERVICE"
            ],
            "ambient": [
                "CAP_AUDIT_WRITE",
                "CAP_KILL",
                "CAP_NET_BIND_SERVICE"
            ]
        },
        "rlimits": [
            {
                "type": "RLIMIT_NOFILE",
                "hard": 1024,
                "soft": 1024
            }
        ],
        "noNewPrivileges": true
    },
    "root": {
        "path": "rootfs",
        "readonly": false
    },
    "hostname": "runc-container",
    "mounts": [
        {
            "destination": "/proc",
            "type": "proc",
            "source": "proc"
        },
        {
            "destination": "/dev",
            "type": "tmpfs",
            "source": "tmpfs",
            "options": [
                "nosuid",
                "strictatime",
                "mode=755",
                "size=65536k"
            ]
        },
        {
            "destination": "/dev/pts",
            "type": "devpts",
            "source": "devpts",
            "options": [
                "nosuid",
                "noexec",
                "newinstance",
                "ptmxmode=0666",
                "mode=0620",
                "gid=5"
            ]
        },
        {
            "destination": "/dev/shm",
            "type": "tmpfs",
            "source": "tmpfs",
            "options": [
                "nosuid",
                "noexec",
                "nodev",
                "mode=1777",
                "size=65536k"
            ]
        },
        {
            "destination": "/dev/mqueue",
            "type": "mqueue",
            "source": "mqueue",
            "options": [
                "nosuid",
                "noexec",
                "nodev"
            ]
        },
        {
            "destination": "/sys",
            "type": "sysfs",
            "source": "sysfs",
            "options": [
                "nosuid",
                "noexec",
                "nodev",
                "ro"
            ]
        }
    ],
    "hooks": {},
    "linux": {
        "resources": {
            "devices": [
                {
                    "allow": false,
                    "access": "rwm"
                }
            ]
        },
        "namespaces": [
            {
                "type": "pid"
            },
            {
                "type": "network"
            },
            {
                "type": "ipc"
            },
            {
                "type": "uts"
            },
            {
                "type": "mount"
            }
        ],
        "maskedPaths": [
            "/proc/kcore",
            "/proc/latency_stats",
            "/proc/timer_list",
            "/proc/timer_stats",
            "/proc/sched_debug",
            "/sys/firmware"
        ],
        "readonlyPaths": [
            "/proc/asound",
            "/proc/bus",
            "/proc/fs",
            "/proc/irq",
            "/proc/sys",
            "/proc/sysrq-trigger"
        ]
    }
}
EOF

echo "OCI运行时配置创建完成"

3.5 运行和管理容器

文件名:run_container.sh

#!/bin/bash

CONTAINER_DIR="$HOME/basic-container"
cd $CONTAINER_DIR

CONTAINER_NAME="demo-container"

echo "=== 运行容器演示 ==="

# 1. 创建容器
echo "1. 创建容器..."
runc create --bundle . $CONTAINER_NAME

# 2. 查看容器状态
echo "2. 容器状态:"
runc list

# 3. 启动容器
echo "3. 启动容器..."
runc start $CONTAINER_NAME &

# 等待容器启动
sleep 2

# 4. 再次查看容器状态
echo "4. 容器运行状态:"
runc list

# 5. 在容器内执行命令
echo "5. 在容器内执行命令:"
runc exec $CONTAINER_NAME /bin/sh -c "echo 'Hello from container!'"
runc exec $CONTAINER_NAME /bin/sh -c "ps aux"

# 6. 暂停容器
echo "6. 暂停容器..."
runc pause $CONTAINER_NAME
echo "容器已暂停"
runc list

# 7. 恢复容器
echo "7. 恢复容器..."
runc resume $CONTAINER_NAME
echo "容器已恢复"
runc list

# 8. 停止容器
echo "8. 停止容器..."
runc kill $CONTAINER_NAME KILL

# 9. 删除容器
echo "9. 删除容器..."
runc delete $CONTAINER_NAME

echo "=== 容器生命周期演示完成 ==="

4. containerd原理与部署

4.1 containerd架构深度解析

containerd是一个工业级的容器运行时,它提供了完整的容器生命周期管理能力。

graph TB
    A[客户端工具] --> B[containerd API]
    
    subgraph C [containerd核心组件]
        B --> D[Runtime Service]
        B --> E[Image Service]
        B --> F[Content Service]
        B --> G[Snapshot Service]
        B --> H[Metadata Store]
    end
    
    D --> I[Task Service]
    I --> J[Shim]
    J --> K[runc]
    K --> L[容器进程]
    
    M[存储后端] --> F
    N[镜像仓库] --> E
    
    style C fill:#2d5c7a
    style K fill:#

4.2 安装和配置containerd

文件名:install_containerd.sh

#!/bin/bash

echo "开始安装containerd..."

# 下载containerd
cd ~/container-runtime
wget https://github.com/containerd/containerd/releases/download/v1.6.8/containerd-1.6.8-linux-amd64.tar.gz

# 解压到系统目录
sudo tar Cxzvf /usr/local containerd-1.6.8-linux-amd64.tar.gz

# 下载runc(如果尚未安装)
wget https://github.com/opencontainers/runc/releases/download/v1.1.4/runc.amd64
sudo install -m 755 runc.amd64 /usr/local/sbin/runc

# 下载cni插件
mkdir -p /opt/cni/bin
wget https://github.com/containernetworking/plugins/releases/download/v1.1.1/cni-plugins-linux-amd64-v1.1.1.tgz
sudo tar Cxzvf /opt/cni/bin cni-plugins-linux-amd64-v1.1.1.tgz

# 创建containerd配置文件
sudo mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml

# 修改配置以使用系统cgroup驱动
sudo sed -i 's/SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml

# 创建systemd服务文件
sudo cat > /etc/systemd/system/containerd.service << 'EOF'
[Unit]
Description=containerd container runtime
Documentation=https://containerd.io
After=network.target local-fs.target

[Service]
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/containerd
Type=notify
Delegate=yes
KillMode=process
Restart=always
RestartSec=5
LimitNPROC=infinity
LimitCORE=infinity
LimitNOFILE=infinity
TasksMax=infinity
OOMScoreAdjust=-999

[Install]
WantedBy=multi-user.target
EOF

# 启动containerd服务
sudo systemctl daemon-reload
sudo systemctl enable containerd
sudo systemctl start containerd

# 验证安装
echo "验证containerd安装:"
sudo systemctl status containerd
containerd --version

echo "containerd安装完成"

4.3 配置containerd客户端工具

文件名:setup_ctr_tool.sh

#!/bin/bash

echo "设置containerd客户端工具..."

# ctr是containerd自带的客户端工具,我们也可以安装nerdctl(更友好的客户端)

# 安装nerdctl
cd ~/container-runtime
wget https://github.com/containerd/nerdctl/releases/download/v0.23.0/nerdctl-0.23.0-linux-amd64.tar.gz
sudo tar Cxzvf /usr/local/bin nerdctl-0.23.0-linux-amd64.tar.gz

# 验证安装
echo "验证客户端工具:"
ctr version
nerdctl --version

# 配置命名空间
sudo ctr namespace create demo

echo "客户端工具配置完成"

4.4 使用containerd管理容器

文件名:containerd_operations.sh

#!/bin/bash

echo "=== containerd容器操作演示 ==="

# 设置命名空间
NAMESPACE="demo"

# 1. 拉取镜像
echo "1. 拉取busybox镜像..."
sudo ctr -n $NAMESPACE images pull docker.io/library/busybox:latest

# 2. 查看镜像
echo "2. 查看本地镜像:"
sudo ctr -n $NAMESPACE images list

# 3. 创建容器
echo "3. 创建容器..."
sudo ctr -n $NAMESPACE containers create \
    --env "HOME=/" \
    --env "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" \
    docker.io/library/busybox:latest \
    busybox-demo

# 4. 查看容器
echo "4. 查看容器:"
sudo ctr -n $NAMESPACE containers list

# 5. 启动容器任务
echo "5. 启动容器任务..."
sudo ctr -n $NAMESPACE task start -d busybox-demo

# 6. 查看任务
echo "6. 查看运行中的任务:"
sudo ctr -n $NAMESPACE task list

# 7. 在容器内执行命令
echo "7. 在容器内执行命令:"
sudo ctr -n $NAMESPACE task exec --exec-id demo-1 busybox-demo \
    /bin/sh -c "echo 'Hello from containerd container!'"

sudo ctr -n $NAMESPACE task exec --exec-id demo-2 busybox-demo \
    /bin/sh -c "ps aux"

# 8. 暂停任务
echo "8. 暂停任务..."
sudo ctr -n $NAMESPACE task pause busybox-demo
echo "任务已暂停"

# 9. 恢复任务
echo "9. 恢复任务..."
sudo ctr -n $NAMESPACE task resume busybox-demo
echo "任务已恢复"

# 10. 停止任务
echo "10. 停止任务..."
sudo ctr -n $NAMESPACE task kill busybox-demo

# 11. 删除任务和容器
echo "11. 清理资源..."
sudo ctr -n $NAMESPACE task rm busybox-demo
sudo ctr -n $NAMESPACE containers delete busybox-demo

echo "=== containerd操作演示完成 ==="

5. 容器网络配置

5.1 配置CNI网络插件

文件名:setup_cni_network.sh

#!/bin/bash

echo "配置CNI网络..."

# 创建CNI配置目录
sudo mkdir -p /etc/cni/net.d

# 创建bridge网络配置
sudo cat > /etc/cni/net.d/10-mynet.conf << 'EOF'
{
  "cniVersion": "1.0.0",
  "name": "mynet",
  "type": "bridge",
  "bridge": "cni0",
  "isGateway": true,
  "ipMasq": true,
  "ipam": {
    "type": "host-local",
    "subnet": "10.22.0.0/16",
    "routes": [
      { "dst": "0.0.0.0/0" }
    ]
  }
}
EOF

# 创建端口映射配置
sudo cat > /etc/cni/net.d/99-loopback.conf << 'EOF'
{
  "cniVersion": "1.0.0",
  "name": "lo",
  "type": "loopback"
}
EOF

echo "CNI网络配置完成"

5.2 创建带网络的容器

文件名:create_network_container.sh

#!/bin/bash

echo "创建带网络的容器..."

# 创建容器根目录
CONTAINER_NET_DIR="$HOME/network-container"
mkdir -p $CONTAINER_NET_DIR/rootfs

# 准备rootfs(复用之前的busybox)
cp -r $HOME/basic-container/rootfs/* $CONTAINER_NET_DIR/rootfs/

cd $CONTAINER_NET_DIR

# 生成包含网络配置的OCI配置
cat > config.json << 'EOF'
{
    "ociVersion": "1.0.2-dev",
    "process": {
        "terminal": true,
        "user": {
            "uid": 0,
            "gid": 0
        },
        "args": [
            "sh"
        ],
        "env": [
            "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
            "TERM=xterm",
            "HOME=/root"
        ],
        "cwd": "/",
        "capabilities": {
            "bounding": [
                "CAP_AUDIT_WRITE",
                "CAP_KILL",
                "CAP_NET_BIND_SERVICE",
                "CAP_NET_RAW"
            ],
            "effective": [
                "CAP_AUDIT_WRITE",
                "CAP_KILL",
                "CAP_NET_BIND_SERVICE",
                "CAP_NET_RAW"
            ],
            "inheritable": [
                "CAP_AUDIT_WRITE",
                "CAP_KILL",
                "CAP_NET_BIND_SERVICE",
                "CAP_NET_RAW"
            ],
            "permitted": [
                "CAP_AUDIT_WRITE",
                "CAP_KILL",
                "CAP_NET_BIND_SERVICE",
                "CAP_NET_RAW"
            ],
            "ambient": [
                "CAP_AUDIT_WRITE",
                "CAP_KILL",
                "CAP_NET_BIND_SERVICE",
                "CAP_NET_RAW"
            ]
        },
        "rlimits": [
            {
                "type": "RLIMIT_NOFILE",
                "hard": 1024,
                "soft": 1024
            }
        ]
    },
    "root": {
        "path": "rootfs",
        "readonly": false
    },
    "hostname": "net-container",
    "mounts": [
        {
            "destination": "/proc",
            "type": "proc",
            "source": "proc"
        },
        {
            "destination": "/dev",
            "type": "tmpfs",
            "source": "tmpfs",
            "options": [
                "nosuid",
                "strictatime",
                "mode=755",
                "size=65536k"
            ]
        },
        {
            "destination": "/dev/pts",
            "type": "devpts",
            "source": "devpts",
            "options": [
                "nosuid",
                "noexec",
                "newinstance",
                "ptmxmode=0666",
                "mode=0620",
                "gid=5"
            ]
        },
        {
            "destination": "/dev/shm",
            "type": "tmpfs",
            "source": "tmpfs",
            "options": [
                "nosuid",
                "noexec",
                "nodev",
                "mode=1777",
                "size=65536k"
            ]
        },
        {
            "destination": "/sys",
            "type": "sysfs",
            "source": "sysfs",
            "options": [
                "nosuid",
                "noexec",
                "nodev",
                "ro"
            ]
        }
    ],
    "hooks": {
        "start": [
            {
                "path": "/opt/cni/bin",
                "args": ["CNI_COMMAND=ADD", "CNI_CONTAINERID=net-container", "CNI_NETNS=/var/run/netns/net-container", "CNI_IFNAME=eth0", "CNI_PATH=/opt/cni/bin"],
                "env": [
                    "CNI_COMMAND=ADD",
                    "CNI_CONTAINERID=net-container",
                    "CNI_NETNS=/var/run/netns/net-container",
                    "CNI_IFNAME=eth0",
                    "CNI_PATH=/opt/cni/bin"
                ]
            }
        ]
    },
    "linux": {
        "resources": {
            "devices": [
                {
                    "allow": false,
                    "access": "rwm"
                }
            ]
        },
        "namespaces": [
            {
                "type": "pid"
            },
            {
                "type": "network"
            },
            {
                "type": "ipc"
            },
            {
                "type": "uts"
            },
            {
                "type": "mount"
            }
        ],
        "maskedPaths": [
            "/proc/kcore",
            "/proc/latency_stats",
            "/proc/timer_list",
            "/proc/timer_stats",
            "/proc/sched_debug",
            "/sys/firmware"
        ],
        "readonlyPaths": [
            "/proc/asound",
            "/proc/bus",
            "/proc/fs",
            "/proc/irq",
            "/proc/sys",
            "/proc/sysrq-trigger"
        ]
    }
}
EOF

echo "带网络配置的容器准备完成"

6. 容器存储管理

6.1 理解容器存储原理

容器存储使用分层文件系统,通常使用overlayfs作为存储驱动。

graph TB
    A[容器读写层] --> B[overlayfs]
    C[容器层] --> B
    D[镜像层] --> B
    E[基础镜像] --> D
    
    B --> F[联合挂载点]
    
    style A fill:#
    style C fill:#
    style D fill:#
    style E fill:#

6.2 配置overlayfs存储

文件名:setup_overlay_storage.sh

#!/bin/bash

echo "配置overlayfs存储..."

# 检查overlayfs支持
echo "检查overlayfs内核支持:"
grep overlay /proc/filesystems

# 加载overlay模块
sudo modprobe overlay

# 创建存储目录结构
STORAGE_DIR="$HOME/container-storage"
mkdir -p $STORAGE_DIR/overlay
mkdir -p $STORAGE_DIR/snapshots

# 创建基础镜像层
mkdir -p $STORAGE_DIR/images/base/rootfs
cd $STORAGE_DIR/images/base/rootfs

# 创建基础文件系统结构
mkdir -p bin etc home lib lib64 proc sys tmp usr var
cat > etc/passwd << 'EOF'
root:x:0:0:root:/root:/bin/sh
EOF

cat > etc/group << 'EOF'
root:x:0:
EOF

# 演示overlayfs挂载
echo "演示overlayfs挂载..."

# 创建lowerdir(基础层)
mkdir -p $STORAGE_DIR/overlay/lower
cp -r $STORAGE_DIR/images/base/rootfs/* $STORAGE_DIR/overlay/lower/

# 创建upperdir(可写层)
mkdir -p $STORAGE_DIR/overlay/upper

# 创建workdir(工作目录)
mkdir -p $STORAGE_DIR/overlay/work

# 创建merged(合并挂载点)
mkdir -p $STORAGE_DIR/overlay/merged

# 挂载overlayfs
sudo mount -t overlay overlay \
  -o lowerdir=$STORAGE_DIR/overlay/lower,\
upperdir=$STORAGE_DIR/overlay/upper,\
workdir=$STORAGE_DIR/overlay/work \
  $STORAGE_DIR/overlay/merged

echo "overlayfs挂载完成"
echo "挂载点: $STORAGE_DIR/overlay/merged"

# 在合并点创建文件测试
sudo touch $STORAGE_DIR/overlay/merged/test-file.txt
sudo echo "Hello OverlayFS" > $STORAGE_DIR/overlay/merged/test-file.txt

echo "测试文件已创建"
ls -la $STORAGE_DIR/overlay/merged/

# 卸载overlayfs
sudo umount $STORAGE_DIR/overlay/merged

echo "overlayfs存储配置演示完成"

7. 安全特性配置

7.1 配置Linux安全模块

文件名:configure_security.sh

#!/bin/bash

echo "配置容器安全特性..."

# 1. 检查AppArmor支持
echo "1. 检查AppArmor:"
if command -v apparmor_status >/dev/null 2>&1; then
    echo "AppArmor已安装"
    sudo apparmor_status
else
    echo "AppArmor未安装"
fi

# 2. 检查Seccomp支持
echo "2. 检查Seccomp:"
if [ -e /proc/1/status ]; then
    if grep -q Seccomp /proc/1/status; then
        echo "Seccomp支持已启用"
    else
        echo "Seccomp不支持"
    fi
fi

# 3. 创建Seccomp配置文件
mkdir -p $HOME/container-security
cat > $HOME/container-security/seccomp-profile.json << 'EOF'
{
    "defaultAction": "SCMP_ACT_ERRNO",
    "architectures": [
        "SCMP_ARCH_X86_64",
        "SCMP_ARCH_X86",
        "SCMP_ARCH_X32"
    ],
    "syscalls": [
        {
            "names": [
                "accept",
                "accept4",
                "access",
                "alarm",
                "bind",
                "brk",
                "capget",
                "capset",
                "chdir",
                "chmod",
                "chown",
                "chown32",
                "clock_getres",
                "clock_gettime",
                "clock_nanosleep",
                "close",
                "connect",
                "copy_file_range",
                "dup",
                "dup2",
                "dup3",
                "epoll_create",
                "epoll_create1",
                "epoll_ctl",
                "epoll_pwait",
                "epoll_wait",
                "execve",
                "execveat",
                "exit",
                "exit_group",
                "faccessat",
                "faccessat2",
                "fadvise64",
                "fadvise64_64",
                "fallocate",
                "fanotify_mark",
                "fchdir",
                "fchmod",
                "fchmodat",
                "fchown",
                "fchown32",
                "fchownat",
                "fcntl",
                "fcntl64",
                "fdatasync",
                "fgetxattr",
                "flistxattr",
                "flock",
                "fork",
                "fremovexattr",
                "fsetxattr",
                "fstat",
                "fstat64",
                "fstatat64",
                "fstatfs",
                "fstatfs64",
                "fsync",
                "ftruncate",
                "ftruncate64",
                "futex",
                "futimesat",
                "getcpu",
                "getcwd",
                "getdents",
                "getdents64",
                "getegid",
                "getegid32",
                "geteuid",
                "geteuid32",
                "getgid",
                "getgid32",
                "getgroups",
                "getgroups32",
                "getitimer",
                "getpeername",
                "getpgid",
                "getpgrp",
                "getpid",
                "getppid",
                "getpriority",
                "getrandom",
                "getresgid",
                "getresgid32",
                "getresuid",
                "getresuid32",
                "getrlimit",
                "get_robust_list",
                "getrusage",
                "getsid",
                "getsockname",
                "getsockopt",
                "get_thread_area",
                "gettid",
                "gettimeofday",
                "getuid",
                "getuid32",
                "getxattr",
                "inotify_add_watch",
                "inotify_init",
                "inotify_init1",
                "inotify_rm_watch",
                "io_cancel",
                "io_destroy",
                "io_getevents",
                "io_setup",
                "io_submit",
                "ioctl",
                "kill",
                "lchown",
                "lchown32",
                "lgetxattr",
                "link",
                "linkat",
                "listen",
                "listxattr",
                "llistxattr",
                "lremovexattr",
                "lseek",
                "lsetxattr",
                "lstat",
                "lstat64",
                "madvise",
                "memfd_create",
                "mincore",
                "mkdir",
                "mkdirat",
                "mknod",
                "mknodat",
                "mlock",
                "mlock2",
                "mlockall",
                "mmap",
                "mmap2",
                "mprotect",
                "mq_getsetattr",
                "mq_notify",
                "mq_open",
                "mq_timedreceive",
                "mq_timedsend",
                "mq_unlink",
                "mremap",
                "msgctl",
                "msgget",
                "msgrcv",
                "msgsnd",
                "msync",
                "munlock",
                "munlockall",
                "munmap",
                "nanosleep",
                "newfstatat",
                "_newselect",
                "open",
                "openat",
                "pause",
                "pipe",
                "pipe2",
                "poll",
                "ppoll",
                "prctl",
                "pread64",
                "preadv",
                "preadv2",
                "prlimit64",
                "pselect6",
                "pwrite64",
                "pwritev",
                "pwritev2",
                "read",
                "readahead",
                "readlink",
                "readlinkat",
                "readv",
                "recv",
                "recvfrom",
                "recvmmsg",
                "recvmsg",
                "remap_file_pages",
                "removexattr",
                "rename",
                "renameat",
                "renameat2",
                "restart_syscall",
                "rmdir",
                "rt_sigaction",
                "rt_sigpending",
                "rt_sigprocmask",
                "rt_sigqueueinfo",
                "rt_sigreturn",
                "rt_sigsuspend",
                "rt_sigtimedwait",
                "rt_tgsigqueueinfo",
                "sched_getaffinity",
                "sched_getattr",
                "sched_getparam",
                "sched_get_priority_max",
                "sched_get_priority_min",
                "sched_getscheduler",
                "sched_rr_get_interval",
                "sched_setaffinity",
                "sched_setattr",
                "sched_setparam",
                "sched_setscheduler",
                "sched_yield",
                "seccomp",
                "select",
                "semctl",
                "semget",
                "semop",
                "semtimedop",
                "send",
                "sendmmsg",
                "sendmsg",
                "sendto",
                "setfsgid",
                "setfsgid32",
                "setfsuid",
                "setfsuid32",
                "setgid",
                "setgid32",
                "setgroups",
                "setgroups32",
                "setitimer",
                "setpgid",
                "setpriority",
                "setregid",
                "setregid32",
                "setresgid",
                "setresgid32",
                "setresuid",
                "setresuid32",
                "setreuid",
                "setreuid32",
                "setrlimit",
                "set_robust_list",
                "setsid",
                "setsockopt",
                "set_thread_area",
                "set_tid_address",
                "setuid",
                "setuid32",
                "setxattr",
                "shmat",
                "shmctl",
                "shmdt",
                "shmget",
                "shutdown",
                "sigaction",
                "sigaltstack",
                "signal",
                "signalfd",
                "signalfd4",
                "sigprocmask",
                "sigreturn",
                "socket",
                "socketcall",
                "socketpair",
                "splice",
                "stat",
                "stat64",
                "statfs",
                "statfs64",
                "statx",
                "symlink",
                "symlinkat",
                "sync",
                "sync_file_range",
                "syncfs",
                "sysinfo",
                "tee",
                "tgkill",
                "time",
                "timer_create",
                "timer_delete",
                "timer_getoverrun",
                "timer_gettime",
                "timer_settime",
                "timerfd_create",
                "timerfd_gettime",
                "timerfd_settime",
                "times",
                "tkill",
                "truncate",
                "truncate64",
                "ugetrlimit",
                "umask",
                "uname",
                "unlink",
                "unlinkat",
                "utime",
                "utimensat",
                "utimes",
                "vfork",
                "vmsplice",
                "wait4",
                "waitid",
                "waitpid",
                "write",
                "writev"
            ],
            "action": "SCMP_ACT_ALLOW"
        }
    ]
}
EOF

echo "安全配置完成"

8. 监控和调试

8.1 容器监控工具

文件名:container_monitoring.sh

#!/bin/bash

echo "容器监控和调试工具..."

# 1. 安装监控工具
sudo apt-get install -y htop iotop nethogs

# 2. 创建容器监控脚本
cat > $HOME/container-monitor.sh << 'EOF'
#!/bin/bash

echo "=== 容器监控信息 ==="
echo ""

# 检查runc容器
echo "1. runc容器状态:"
if command -v runc >/dev/null 2>&1; then
    sudo runc list
else
    echo "runc未安装"
fi
echo ""

# 检查containerd容器
echo "2. containerd容器状态:"
if command -v ctr >/dev/null 2>&1; then
    sudo ctr containers list
else
    echo "ctr未安装"
fi
echo ""

# 系统资源使用
echo "3. 系统资源使用:"
echo "内存使用:"
free -h
echo ""

echo "磁盘使用:"
df -h
echo ""

echo "CPU使用:"
top -bn1 | head -20
echo ""

# 网络统计
echo "4. 网络统计:"
netstat -i
echo ""

# 进程树
echo "5. 容器相关进程:"
ps aux | grep -E "(runc|containerd|ctr)" | grep -v grep
echo ""

echo "=== 监控完成 ==="
EOF

chmod +x $HOME/container-monitor.sh

# 3. 创建容器调试脚本
cat > $HOME/container-debug.sh << 'EOF'
#!/bin/bash

CONTAINER_ID=${1:-}

if [ -z "$CONTAINER_ID" ]; then
    echo "用法: $0 <容器ID>"
    exit 1
fi

echo "调试容器: $CONTAINER_ID"
echo ""

# 检查容器状态
echo "1. 容器状态:"
sudo runc list | grep $CONTAINER_ID
echo ""

# 检查cgroup
echo "2. cgroup信息:"
CGROUP_PATH=$(find /sys/fs/cgroup -name "*$CONTAINER_ID*" -type d 2>/dev/null | head -1)
if [ -n "$CGROUP_PATH" ]; then
    echo "cgroup路径: $CGROUP_PATH"
    echo "内存使用:"
    cat $CGROUP_PATH/memory.current 2>/dev/null || echo "无法读取内存信息"
    echo "CPU使用:"
    cat $CGROUP_PATH/cpu.stat 2>/dev/null || echo "无法读取CPU信息"
else
    echo "未找到cgroup信息"
fi
echo ""

# 检查命名空间
echo "3. 命名空间信息:"
sudo ls -la /proc/$(sudo runc ps $CONTAINER_ID 2>/dev/null | tail -1 | awk '{print $2}')/ns/ 2>/dev/null || echo "无法获取命名空间信息"
echo ""

# 检查日志
echo "4. 容器日志:"
JOURNAL_FILTER="SYSLOG_IDENTIFIER=$CONTAINER_ID"
sudo journalctl -t $CONTAINER_ID 2>/dev/null || echo "无日志信息"
echo ""

echo "调试完成"
EOF

chmod +x $HOME/container-debug.sh

echo "监控工具安装完成"

9. 实战项目:构建完整的容器运行时环境

9.1 自动化部署脚本

文件名:deploy_container_runtime.sh

#!/bin/bash

set -e

echo "开始部署完整的容器运行时环境..."
echo "=========================================="

# 定义颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# 日志函数
log_info() {
    echo -e "${GREEN}[INFO]${NC} $1"
}

log_warn() {
    echo -e "${YELLOW}[WARN]${NC} $1"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $1"
}

# 检查并安装依赖
install_dependencies() {
    log_info "安装系统依赖..."
    sudo apt-get update
    sudo apt-get install -y \
        make \
        gcc \
        git \
        pkg-config \
        libseccomp-dev \
        btrfs-progs
}

# 编译安装runc
install_runc() {
    log_info "编译安装runc..."
    
    cd ~/container-runtime
    if [ ! -d "runc" ]; then
        git clone https://github.com/opencontainers/runc.git
    fi
    
    cd runc
    git checkout v1.1.4
    
    make
    sudo make install
    
    # 验证安装
    if runc --version >/dev/null 2>&1; then
        log_info "runc安装成功: $(runc --version)"
    else
        log_error "runc安装失败"
        exit 1
    fi
}

# 安装containerd
install_containerd() {
    log_info "安装containerd..."
    
    cd ~/container-runtime
    wget -q https://github.com/containerd/containerd/releases/download/v1.6.8/containerd-1.6.8-linux-amd64.tar.gz
    sudo tar Cxzvf /usr/local containerd-1.6.8-linux-amd64.tar.gz
    
    # 创建systemd服务
    sudo cat > /etc/systemd/system/containerd.service << 'EOF'
[Unit]
Description=containerd container runtime
Documentation=https://containerd.io
After=network.target local-fs.target

[Service]
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/containerd
Type=notify
Delegate=yes
KillMode=process
Restart=always
RestartSec=5
LimitNPROC=infinity
LimitCORE=infinity
LimitNOFILE=infinity
TasksMax=infinity
OOMScoreAdjust=-999

[Install]
WantedBy=multi-user.target
EOF

    sudo systemctl daemon-reload
    sudo systemctl enable containerd
    sudo systemctl start containerd
    
    # 验证服务状态
    if sudo systemctl is-active --quiet containerd; then
        log_info "containerd服务运行正常"
    else
        log_error "containerd服务启动失败"
        exit 1
    fi
}

# 配置CNI网络
setup_cni() {
    log_info "配置CNI网络..."
    
    sudo mkdir -p /opt/cni/bin
    wget -q https://github.com/containernetworking/plugins/releases/download/v1.1.1/cni-plugins-linux-amd64-v1.1.1.tgz
    sudo tar Cxzvf /opt/cni/bin cni-plugins-linux-amd64-v1.1.1.tgz
    
    sudo mkdir -p /etc/cni/net.d
    sudo cat > /etc/cni/net.d/10-mynet.conf << 'EOF'
{
  "cniVersion": "1.0.0",
  "name": "mynet",
  "type": "bridge",
  "bridge": "cni0",
  "isGateway": true,
  "ipMasq": true,
  "ipam": {
    "type": "host-local",
    "subnet": "10.22.0.0/16",
    "routes": [
      { "dst": "0.0.0.0/0" }
    ]
  }
}
EOF
}

# 创建测试容器
create_test_container() {
    log_info "创建测试容器..."
    
    TEST_DIR="$HOME/test-container"
    mkdir -p $TEST_DIR/rootfs
    
    # 准备busybox rootfs
    cd $TEST_DIR/rootfs
    mkdir -p bin lib lib64 etc proc sys
    
    # 下载busybox
    wget -q -O busybox https://busybox.net/downloads/binaries/1.35.0-x86_64-linux-musl/busybox
    chmod +x busybox
    ./busybox --install .
    
    # 创建设备文件
    sudo mknod dev/console c 5 1
    sudo mknod dev/null c 1 3
    sudo mknod dev/zero c 1 5
    
    # 生成OCI配置
    cd $TEST_DIR
    runc spec --rootless
    
    log_info "测试容器准备完成"
}

# 运行集成测试
run_integration_test() {
    log_info "运行集成测试..."
    
    # 测试runc
    log_info "测试runc..."
    cd $HOME/test-container
    sudo runc create test-container
    sudo runc start test-container &
    sleep 2
    sudo runc kill test-container KILL
    sudo runc delete test-container
    
    # 测试containerd
    log_info "测试containerd..."
    sudo ctr images pull docker.io/library/busybox:latest
    sudo ctr containers create docker.io/library/busybox:latest test-busybox
    sudo ctr containers delete test-busybox
    
    log_info "集成测试完成"
}

# 主执行流程
main() {
    log_info "开始部署容器运行时环境"
    
    # 创建工作目录
    mkdir -p ~/container-runtime
    
    # 执行安装步骤
    install_dependencies
    install_runc
    install_containerd
    setup_cni
    create_test_container
    run_integration_test
    
    log_info "=========================================="
    log_info "容器运行时环境部署完成!"
    log_info "可用组件:"
    log_info "  - runc: $(runc --version | head -1)"
    log_info "  - containerd: $(containerd --version | head -1)"
    log_info "  - CNI插件: 已安装到 /opt/cni/bin"
    log_info ""
    log_info "测试容器位于: $HOME/test-container"
    log_info "监控脚本位于: $HOME/container-monitor.sh"
}

# 执行主函数
main "$@"

10. 总结

通过本教程,我们深入探讨了Linux容器运行时(runc/containerd)的原理和实际操作。从基础的runc编译安装到完整的containerd环境部署,我们覆盖了容器运行时的各个方面:

  1. runc原理:理解了OCI标准容器运行时的底层实现
  2. containerd架构:掌握了工业级容器运行时的组件交互
  3. 网络配置:学习了CNI插件的工作原理和配置方法
  4. 存储管理:深入了解了overlayfs等存储驱动的使用
  5. 安全特性:配置了seccomp、capabilities等安全机制
  6. 监控调试:建立了完整的容器监控和调试体系

这个教程提供了从零开始构建完整容器运行时环境的完整路径,每个步骤都包含详细的代码和解释,确保读者能够理解原理并实际动手操作。