服务器准备
本次测试使用的是ubuntu22.04系统,使用ceph-deploy部署ceph集群,版本为reef
root@node5:~# uname -a
Linux node5 5.15.0-87-generic #97-Ubuntu SMP Mon Oct 2 21:09:21 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux
集群一
五台服务器作为ceph集群OSD存储服务器,每台服务器支持两个网络,public网络针对客户端访问,cluster网络用于集群管理及数据同步,每台三块或以上的磁盘
# 192.168.146.x作为cluster网络,192.168.202.x作为public网络
192.168.146.101/192.168.202.151 node1
192.168.146.102/192.168.202.152 node2
192.168.146.103/192.168.202.153 node3
192.168.146.104/192.168.202.154 node4
192.168.146.105/192.168.202.155 node5
/dev/sdb /dev/sdc /dev/sdd /dev/sde # 各个磁盘20G
集群二
三台服务器作为ceph集群Mon监视服务器,每台服务器可以和ceph集群的cluster网络通信
192.168.146.101/192.168.202.151 node1
192.168.146.102/192.168.202.152 node2
192.168.146.103/192.168.202.153 node3
集群三
两个ceph-mgr管理服务器,可以和ceph集群的cluster网络通信
192.168.146.104/192.168.202.154 node4
192.168.146.105/192.168.202.155 node5
服务器初始化配置
同步时间
各个服务器上的时间要同步,ceph对时间要求很高,需要进行同步
# 设置时区为上海
timedatectl set-timezone Asia/Shanghai
# 安装chrony,用于同步时间
apt install chrony -y
# 启动chrony
systemctl enable chrony --now
配置hosts解析
修改/etc/hosts。添加如下内容
192.168.202.151 node1
192.168.202.152 node2
192.168.202.153 node3
192.168.202.154 node4
192.168.202.155 node5
配置Ceph国内源
添加Key
wget -q -O- 'https://mirrors.tuna.tsinghua.edu.cn/ceph/keys/release.asc' | sudo apt-key add -
添加仓库
sudo apt-add-repository "deb https://mirrors.tuna.tsinghua.edu.cn/ceph/debian-reef $(lsb_release -sc) main" -y
ssh秘钥认证
本次示例使用node1作为ceph-deploy节点
生成秘钥对
root@node1:~# ssh-keygen -t rsa
拷贝公钥到其他节点
ssh-copy-id root@node1
ssh-copy-id root@node2
ssh-copy-id root@node3
ssh-copy-id root@node4
ssh-copy-id root@node5
安装ceph-deploy
创建工作目录
因为ceph-deploy会有输出,后面执行ceph-deploy时候在该目录下进行
root@node1:~# mkdir cephcluster
root@node1:~# cd cephcluster/
安装ceph-deploy
# 安装python3和pip
apt-get install python3 python3-pip -y
# 克隆ceph-deploy
git clone https://github.com/ceph/ceph-deploy.git
cd ceph-deploy
#如果要使用ceph-deploy直接部署ceph那么需要修改以下这个文件
#vim /root/cephcluster/ceph-deploy/ceph_deploy/install.py
#将args.release = 'nautilus' 修改成 args.release = 'reef'
pip3 install setuptools
python3 setup.py install
在cephcluster目录下执行
new 命令作用: Start deploying a new cluster, and write a CLUSTER.conf and keyring for it
root@node1:~/cephcluster/ceph-deploy# ceph-deploy new node1 node2 node3 node4 node5
修改ceph配置文件
root@node1:~/cephcluster/ceph-deploy# cat ceph.conf
[global]
fsid = e7f2f395-d8f8-4345-bc48-78e48868fb0f
# 使用node1,node2,node3部署mon
mon_initial_members = node1, node2, node3
mon_host = 192.168.202.151,192.168.202.152,192.168.202.153
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
public_network = 192.168.202.0/24
cluster_network = 192.168.146.0/24
#设置副本数
osd_pool_default_size = 3
#设置最小副本数
osd_pool_default_min_size = 2
#设置时钟偏移0.5s
mon_clock_drift_allowed = .50
每个节点安装需要的软件包
apt-get install -y ceph ceph-osd ceph-mds ceph-mon radosgw
初始化mon并生成秘钥
ceph-deploy mon create-initial
使用Ceph -deploy将配置文件和管理密钥复制到管理节点和Ceph节点
ceph-deploy admin node1 node2 node3 node4 node5
部署mgr服务
ceph-deploy mgr create node4 node5
此时查看下ceph集群的状态
root@node1:~/cephcluster/ceph-deploy# ceph status
cluster:
id: e7f2f395-d8f8-4345-bc48-78e48868fb0f
health: HEALTH_WARN
mons are allowing insecure global_id reclaim # 此警告表示ceph集群允许了非安全模式
services:
mon: 3 daemons, quorum node1,node2,node3 (age 4m)
mgr: node4(active, since 9s), standbys: node5
osd: 0 osds: 0 up, 0 in
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 0 B used, 0 B / 0 B avail
pgs:
解决mons are allowing insecure global_id reclaim错误
root@node1:~/cephcluster/ceph-deploy# ceph config set mon auth_allow_insecure_global_id_reclaim false
查看结果,可以看到警告没有了
root@node1:~/cephcluster/ceph-deploy# ceph status
cluster:
id: e7f2f395-d8f8-4345-bc48-78e48868fb0f
health: HEALTH_WARN
OSD count 0 < osd_pool_default_size 1
services:
mon: 3 daemons, quorum node1,node2,node3 (age 19m)
mgr: node4(active, since 15m), standbys: node5
osd: 0 osds: 0 up, 0 in
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 0 B used, 0 B / 0 B avail
pgs:
添加osd
安装ceph用到的一些基础包
在添加osd之前,对node节点安装一些需要的包
root@node1:~/cephcluster/ceph-deploy# ceph-deploy install --no-adjust-repos --nogpgcheck node1 node2 node3 node4 node5
# 参数说明
# install 表示在指定的节点上安装ceph相关包
# --no-adjust-repos 不替换节点上的repo文件,因为我们已经配置了清华源了
# --nogpgcheck 不进行gpgcheck校验
查看要部署osd的节点的磁盘信息
ceph-deploy disk list node1 node2 node3 node4 node5
擦除磁盘
ceph-deploy disk zap node1 /dev/sdb /dev/sdc /dev/sdd /dev/sde
ceph-deploy disk zap node2 /dev/sdb /dev/sdc /dev/sdd /dev/sde
ceph-deploy disk zap node3 /dev/sdb /dev/sdc /dev/sdd /dev/sde
ceph-deploy disk zap node4 /dev/sdb /dev/sdc /dev/sdd /dev/sde
ceph-deploy disk zap node5 /dev/sdb /dev/sdc /dev/sdd /dev/sde
添加磁盘
# 添加node1的osd
ceph-deploy osd create node1 --data /dev/sdb
ceph-deploy osd create node1 --data /dev/sdc
ceph-deploy osd create node1 --data /dev/sdd
ceph-deploy osd create node1 --data /dev/sde
# 添加node2的osd
ceph-deploy osd create node2 --data /dev/sdb
ceph-deploy osd create node2 --data /dev/sdc
ceph-deploy osd create node2 --data /dev/sdd
ceph-deploy osd create node2 --data /dev/sde
# 添加node3的osd
ceph-deploy osd create node3 --data /dev/sdb
ceph-deploy osd create node3 --data /dev/sdc
ceph-deploy osd create node3 --data /dev/sdd
ceph-deploy osd create node3 --data /dev/sde
# 添加node4的osd
ceph-deploy osd create node4 --data /dev/sdb
ceph-deploy osd create node4 --data /dev/sdc
ceph-deploy osd create node4 --data /dev/sdd
ceph-deploy osd create node4 --data /dev/sde
# 添加node5的osd
ceph-deploy osd create node5 --data /dev/sdb
ceph-deploy osd create node5 --data /dev/sdc
ceph-deploy osd create node5 --data /dev/sdd
ceph-deploy osd create node5 --data /dev/sde
查看osd进程
osd的id是从0开始,逐渐增大的,每增加一个osd,数值加一,每个osd是一个进程,单独进行管理。
root@node1:~/cephcluster/ceph-deploy# ps aux | grep osd
ceph 110288 0.5 2.6 638720 104064 ? Ssl 19:19 0:03 /usr/bin/ceph-osd -f --cluster ceph --id 0 --setuser ceph --setgroup ceph
ceph 112187 0.6 2.5 635652 102260 ? Ssl 19:20 0:03 /usr/bin/ceph-osd -f --cluster ceph --id 1 --setuser ceph --setgroup ceph
ceph 114576 0.7 2.5 634624 102396 ? Ssl 19:21 0:03 /usr/bin/ceph-osd -f --cluster ceph --id 2 --setuser ceph --setgroup ceph
ceph 116428 0.7 2.5 634620 99660 ? Ssl 19:22 0:03 /usr/bin/ceph-osd -f --cluster ceph --id 3 --setuser ceph --setgroup ceph
root 120105 0.0 0.0 6476 2260 pts/1 S+ 19:28 0:00 grep --color=auto osd
查看当前ceph状态
root@node1:~/cephcluster/ceph-deploy# ceph status
cluster:
id: e7f2f395-d8f8-4345-bc48-78e48868fb0f
health: HEALTH_WARN
1 pool(s) have no replicas configured # 由于pool副本数量为1报这个警告
services:
mon: 3 daemons, quorum node1,node2,node3 (age 51m)
mgr: node4(active, since 47m), standbys: node5
osd: 20 osds: 20 up (since 8s), 20 in (since 16s)
data:
pools: 1 pools, 1 pgs
objects: 2 objects, 449 KiB
usage: 546 MiB used, 399 GiB / 400 GiB avail
pgs: 1 active+clean
修复1 pool(s) have no replicas configured警告
查看当前所有pool
root@node1:~/cephcluster/ceph-deploy# ceph osd pool ls
.mgr
调整pool的大小
root@node1:~/cephcluster/ceph-deploy# ceph osd pool set .mgr size 3
再次查看集群状态
root@node1:~/cephcluster/ceph-deploy# ceph status
cluster:
id: e7f2f395-d8f8-4345-bc48-78e48868fb0f
health: HEALTH_OK
services:
mon: 3 daemons, quorum node1,node2,node3 (age 63m)
mgr: node4(active, since 59m), standbys: node5
osd: 20 osds: 20 up (since 12m), 20 in (since 12m)
data:
pools: 1 pools, 1 pgs
objects: 2 objects, 449 KiB
usage: 548 MiB used, 399 GiB / 400 GiB avail
pgs: 1 active+clean
清理节点
如果在部署过程中遇到问题,可以使用下面的命令初始化
ceph-deploy forgetkeys
ceph-deploy purge node1 node2 node3 node4 node5
ceph-deploy purgedata node1 node2 node3 node4 node5
rm ceph.*