昇腾910B初始化
Ascend-HDK-23.0.T60.tar.gz
7.0.t57.tar.gz
Ascend-docker-runtime_5.0.RC2.2_linux-x86_64.run
hccn.conf.bak.tar.gz
驱动安装:(需要reboot生效)
tar -xf Ascend-HDK-23.0.T60.tar.gz && cd Ascend-HDK-23.0.T60 && ./Ascend-hdk-910b-npu-driver_23.0.0.b050_linux-x86-64.run --full --install-for-all --quiet
固件安装(需要reboot生效)
cd Ascend-HDK-23.0.T60 && ./Ascend-hdk-910b-npu-firmware_7.1.0.1.118.run --full --quiet
检查安装:
npu-smi info
CANN包安装两个包:
apt install python3-pip -y #必须要有
tar -xf 7.0.t57.tar.gz && cd 7.0.t57/ &&\
./Ascend-cann-toolkit_7.0.0_linux-x86_64.run --install --quiet
./Ascend-cann-kernels-910b_7.0.0_linux.run --install --quiet
检查版本号
cat /usr/local/Ascend/ascend-toolkit/7.0.0/test-ops/version.info
ascend docker runtime 安装:
cd /home/yanglei1 && wget http://beijing.xstore.qihoo.net/hbox2/Ascend-docker-runtime_5.0.RC2.2_linux-x86_64.run
chmod +x Ascend-docker-runtime_5.0.RC2.2_linux-x86_64.run && ./Ascend-docker-runtime_5.0.RC2.2_linux-x86_64.run --install
**查询安装的版本号
cat /usr/local/Ascend/Ascend-Docker-Runtime/ascend_docker_runtime_install.info
800I A2 arm服务器环境初始化
#系统
ubuntu22.04 ARM
#内核
5.15.0-25-generic
# 分区:
apt install xfsprogs -y &&\
pvcreate /dev/nvme0n1 -ff &&\
pvcreate /dev/nvme1n1 -ff &&\
vgcreate vg01 /dev/nvme0n1 /dev/nvme1n1 &&\
lvcreate -n data -l 100%free vg01 && \
mkfs.xfs -n ftype=1 -f /dev/mapper/vg01-data && \
echo "/dev/mapper/vg01-data /data xfs defaults,uquota,pquota 1 2" >> /etc/fstab && \
mkdir /data &&\
mount -a
# 内核更新
cp /etc/default/grub /etc/default/grub.bak
sed -i '/GRUB_CMDLINE_LINUX=/d' /etc/default/grub
sed -i '10a\GRUB_CMDLINE_LINUX="systemd.unified_cgroup_hierarchy=0 net.ifnames=0 biosdevname=0 rd.driver.blacklist=nouveau nouveau.modeset=0 iommu=pt,memaper=2 iommu.strict=0 iomem=relaxed iommu.passthrough=1 intremap=nosid default_hugepagesz=1G hugepagesz=1G hugepages=2 hugepagesz=2M hugepages=1024 hpet=enable clocksource=hpet rdt=l3cat,l3cdp,l2cat,l2cdp,cmt,mba,mbmtotal,mbmlocal psi=true vector=percpu cpufreq.default_governor=performance processor.max_cstate=0 ipcmni_extend lapic gbpages quiet"' /etc/default/grub
update-grub
modprobe br_netfilter
#需要重启服务器生效
cat >> /etc/sysctl.conf <<EOF
net.ipv4.ip_forward=1
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv6.conf.all.disable_ipv6=1
net.ipv6.conf.default.disable_ipv6=1
net.ipv6.conf.lo.disable_ipv6=1
EOF
sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1
sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1
sudo sysctl -w net.ipv6.conf.lo.disable_ipv6=1
sysctl -p
# lxcfs需要的包和目录
apt-get update && apt-get install -y libfuse-dev fuse libpam0g-dev
mkdir -p /var/lib/lxc/lxcfs/
#初始化目录
mkdir -m 755 /var/log/mindx-dl
chown root:root /var/log/mindx-dl
mkdir -m 750 /var/log/mindx-dl/devicePlugin
chown hwMindX:hwMindX /var/log/mindx-dl/devicePlugin
mkdir -m 750 /var/log/mindx-dl/npu-exporter
chown hwMindX:hwMindX /var/log/mindx-dl/npu-exporter
# 安装 docker
sudo apt install apt-transport-https ca-certificates curl software-properties-common -y
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo "deb [arch=arm64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt update
sudo apt install docker-ce docker-ce-cli containerd.io -y
systemctl enable docker
cat <<EOF | sudo tee /etc/docker/daemon.json
{
"registry-mirrors": ["https://8dexs4ag.mirror.aliyuncs.com"],
"insecure-registries": ["r.addops.soft.360.cn"],
"storage-driver": "overlay2",
"storage-opts": [
"overlay2.size=50G"
],
"log-opts": {
"max-file": "3",
"max-size": "500m"
},
"iptables": true,
"live-restore": true,
"init": true,
"data-root": "/data/docker",
"selinux-enabled": false
}
EOF
mkdir -p /data/docker
systemctl daemon-reload
systemctl restart docker
#修改contaierd的配置,cri=docker的忽略此步
cd /home/yanglei1 && wget http://beijing.xstore.qihoo.net/hbox2-tools/ascend-arm-config.toml && mv /etc/containerd/config.toml /etc/containerd/config.toml.bak && cp -f ascend-arm-config.toml /etc/containerd/config.toml
systemctl daemon-reload && systemctl restart containerd
# 安装nerdctl
cd /data && wget http://beijing.xstore.qihoo.net/hbox2-tools/nerdctl-1.7.7-linux-arm64.tar.gz && tar Cxzvvf /usr/local/bin nerdctl-1.7.7-linux-arm64.tar.gz && nerdctl -n k8s.io ps
#驱动、固件安装
cd /data/ && wget http://beijing.xstore.qihoo.net/hbox2-tools/800I-A2-pkg.tar.gz&&\
tar -xf 800I-A2-pkg.tar.gz
cd 800I-A2-pkg &&\
chmod +x * &&\
#安装
./Ascend-hdk-910b-npu-driver_24.1.rc2_linux-aarch64.run --full --install-for-all
#检查
npu-smi info
# 安装
./Ascend-hdk-910b-npu-firmware_7.3.0.1.231.run --full
# 检查
/usr/local/Ascend/driver/tools/upgrade-tool --device_index -1 --component -1 --version
# 重启服务器
#安装CANN
apt install python3-pip -y
pip3 install attrs cython numpy==1.24 decorator sympy cffi pyyaml pathlib2 psutil protobuf==3.20 scipy requests absl-py -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
#安装toolkit
cd /data/800I-A2-pkg &&\
./Ascend-cann-toolkit_8.0.RC2.2_linux-aarch64.run --install --quiet
source /usr/local/Ascend/ascend-toolkit/set_env.sh
# 安装kernels
./Ascend-cann-kernels-910b_8.0.RC2.2_linux.run --install --quiet
# 安装Ascend-docker-runtime
cd /home/yanglei1 && wget http://beijing.xstore.qihoo.net/hbox2-tools/Ascend-docker-runtime_6.0.RC2_linux-aarch64.run
chmod +x Ascend-docker-runtime_6.0.RC2_linux-aarch64.run && ./Ascend-docker-runtime_6.0.RC2_linux-aarch64.run --install
# 查询安装的版本号
cat /usr/local/Ascend/Ascend-Docker-Runtime/ascend_docker_runtime_install.info