由于以前初始化过rook,删除后再重新初始化会由如下问题
查看日志
在
[root@master ~]# kubectl logs rook-ceph-mon-a-56db84c87f-8f9n9 -n rook-ceph
debug 2023-05-02T22:52:52.338+0000 7f5add2f3700 0 log_channel(audit) log [DBG] : from='admin socket' entity='admin socket' cmd='mon_status' args=[]: dispatch
debug 2023-05-02T22:52:52.338+0000 7f5add2f3700 0 log_channel(audit) log [DBG] : from='admin socket' entity='admin socket' cmd=mon_status args=[]: finished
debug 2023-05-02T22:52:54.481+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:52:54.834+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:52:54.849+0000 7f5ad98f0700 -1 mon.a@0(probing) e3 get_health_metrics reporting 392 slow ops, oldest is log(1 entries from seq 1 at 2023-05-02T22:19:52.339629+0000)
debug 2023-05-02T22:52:55.375+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:52:56.328+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:52:57.481+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:52:57.835+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:52:58.386+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:52:59.338+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:52:59.849+0000 7f5ad98f0700 -1 mon.a@0(probing) e3 get_health_metrics reporting 392 slow ops, oldest is log(1 entries from seq 1 at 2023-05-02T22:19:52.339629+0000)
debug 2023-05-02T22:53:00.488+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:00.840+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:01.394+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:02.345+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:02.346+0000 7f5add2f3700 0 log_channel(audit) log [DBG] : from='admin socket' entity='admin socket' cmd='mon_status' args=[]: dispatch
debug 2023-05-02T22:53:02.346+0000 7f5add2f3700 0 log_channel(audit) log [DBG] : from='admin socket' entity='admin socket' cmd=mon_status args=[]: finished
debug 2023-05-02T22:53:03.482+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:03.836+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:04.389+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:04.850+0000 7f5ad98f0700 -1 mon.a@0(probing) e3 get_health_metrics reporting 394 slow ops, oldest is log(1 entries from seq 1 at 2023-05-02T22:19:52.339629+0000)
debug 2023-05-02T22:53:05.344+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:06.475+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:06.832+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:07.377+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:08.330+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:09.850+0000 7f5ad98f0700 -1 mon.a@0(probing) e3 get_health_metrics reporting 394 slow ops, oldest is log(1 entries from seq 1 at 2023-05-02T22:19:52.339629+0000)
debug 2023-05-02T22:53:12.341+0000 7f5add2f3700 0 log_channel(audit) log [DBG] : from='admin socket' entity='admin socket' cmd='mon_status' args=[]: dispatch
debug 2023-05-02T22:53:12.341+0000 7f5add2f3700 0 log_channel(audit) log [DBG] : from='admin socket' entity='admin socket' cmd=mon_status args=[]: finished
debug 2023-05-02T22:53:14.850+0000 7f5ad98f0700 -1 mon.a@0(probing) e3 get_health_metrics reporting 396 slow ops, oldest is log(1 entries from seq 1 at 2023-05-02T22:19:52.339629+0000)
debug 2023-05-02T22:53:15.072+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:15.429+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:15.970+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:16.922+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:18.064+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:18.417+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:18.973+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:19.850+0000 7f5ad98f0700 -1 mon.a@0(probing) e3 get_health_metrics reporting 396 slow ops, oldest is log(1 entries from seq 1 at 2023-05-02T22:19:52.339629+0000)
debug 2023-05-02T22:53:19.929+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:21.062+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:21.417+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:21.970+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:22.337+0000 7f5add2f3700 0 log_channel(audit) log [DBG] : from='admin socket' entity='admin socket' cmd='mon_status' args=[]: dispatch
debug 2023-05-02T22:53:22.337+0000 7f5add2f3700 0 log_channel(audit) log [DBG] : from='admin socket' entity='admin socket' cmd=mon_status args=[]: finished
debug 2023-05-02T22:53:22.924+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:24.075+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:24.427+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:24.850+0000 7f5ad98f0700 -1 mon.a@0(probing) e3 get_health_metrics reporting 398 slow ops, oldest is log(1 entries from seq 1 at 2023-05-02T22:19:52.339629+0000)
debug 2023-05-02T22:53:24.980+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:25.936+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:27.071+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:27.427+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
debug 2023-05-02T22:53:27.979+0000 7f5adb0f3700 1 mon.a@0(probing) e3 handle_auth_request failed to assign global_id
[root@master ~]# kubectl logs rook-ceph-operator-6d4df78d9b-2pmnp -n rook-ceph
2023-05-02 23:23:01.177164 I | cephclient: writing config file /var/lib/rook/rook-ceph/rook-ceph.config
2023-05-02 23:23:01.177325 I | cephclient: generated admin config in /var/lib/rook/rook-ceph
2023-05-02 23:23:02.395719 I | op-mon: targeting the mon count 3
2023-05-02 23:23:02.499765 I | op-config: applying ceph settings:
[global]
mon cluster log file =
mon allow pool size one = true
mon allow pool delete = true
2023-05-02 23:23:17.501083 I | exec: exec timeout waiting for process ceph to return. Sending interrupt signal to the process
2023-05-02 23:23:17.503307 E | op-config: failed to run command ceph [config assimilate-conf -i /tmp/4265486729 -o /tmp/4265486729.out]
2023-05-02 23:23:17.503363 E | op-config: failed to open assimilate output file /tmp/4265486729.out. open /tmp/4265486729.out: no such file or directory
2023-05-02 23:23:17.503371 E | op-config: failed to apply ceph settings:
2023-05-02 23:23:17.503430 W | op-mon: failed to set Rook and/or user-defined Ceph config options before starting mons; will retry after starting mons. failed to apply default Ceph configurations: failed to set ceph config in the centralized mon configuration database; output: Cluster connection aborted: open /tmp/4265486729.out: no such file or directory
2023-05-02 23:23:17.503437 I | op-mon: checking for basic quorum with existing mons
2023-05-02 23:23:17.723801 I | op-mon: mon "a" ip is 10.98.36.162
2023-05-02 23:23:17.944696 I | op-mon: mon "b" ip is 10.100.201.130
2023-05-02 23:23:18.186950 I | op-mon: mon "c" ip is 10.103.49.58
2023-05-02 23:23:18.572718 I | op-mon: saved mon endpoints to config map map[csi-cluster-config-json:[{"clusterID":"rook-ceph","monitors":["10.98.36.162:6789","10.100.201.130:6789","10.103.49.58:6789"],"namespace":""}] data:a=10.98.36.162:6789,b=10.100.201.130:6789,c=10.103.49.58:6789 mapping:{"node":{"a":{"Name":"node1","Hostname":"node1","Address":"13.19.17.11"},"b":{"Name":"node2","Hostname":"node2","Address":"11.9.20.16"},"c":{"Name":"master","Hostname":"master","Address":"13.19.16.10"}}} maxMonId:2 outOfQuorum:]
2023-05-02 23:23:19.154818 I | cephclient: writing config file /var/lib/rook/rook-ceph/rook-ceph.config
2023-05-02 23:23:19.154988 I | cephclient: generated admin config in /var/lib/rook/rook-ceph
2023-05-02 23:23:20.359989 I | op-mon: deployment for mon rook-ceph-mon-a already exists. updating if needed
2023-05-02 23:23:20.589833 I | op-k8sutil: deployment "rook-ceph-mon-a" did not change, nothing to update
2023-05-02 23:23:20.589858 I | op-mon: waiting for mon quorum with [a b c]
2023-05-02 23:23:21.234924 I | op-mon: mons running: [a b c]
2023-05-02 23:23:41.988269 I | op-mon: mons running: [a b c]
2023-05-02 23:24:02.761942 I | op-mon: mons running: [a b c]
2023-05-02 23:24:23.521001 I | op-mon: mons running: [a b c]
2023-05-02 23:24:44.240099 I | op-mon: mons running: [a b c]
2023-05-02 23:25:05.006403 I | op-mon: mons running: [a b c]
2023-05-02 23:25:25.722755 I | op-mon: mons running: [a b c]
2023-05-02 23:25:46.483070 I | op-mon: mons running: [a b c]
2023-05-02 23:26:07.238395 I | op-mon: mons running: [a b c]
2023-05-02 23:26:27.992617 I | op-mon: mons running: [a b c]
2023-05-02 23:26:48.742360 I | op-mon: mons running: [a b c]
2023-05-02 23:27:09.496986 I | op-mon: mons running: [a b c]
2023-05-02 23:27:30.252042 I | op-mon: mons running: [a b c]
2023-05-02 23:27:51.009655 I | op-mon: mons running: [a b c]
2023-05-02 23:28:11.837820 I | op-mon: mons running: [a b c]
2023-05-02 23:28:32.641846 I | op-mon: mons running: [a b c]
解决办法
删除rook生成的配置文件,重新初始化
# 查看目录结构
[root@master ~]# tree /var/lib/rook
/var/lib/rook
├── mon-a
│ └── data
│ ├── external_log_to
│ ├── keyring
│ ├── kv_backend
│ ├── min_mon_release
│ └── store.db
│ ├── 000072.sst
│ ├── 000075.sst
│ ├── 000077.log
│ ├── CURRENT
│ ├── IDENTITY
│ ├── LOCK
│ ├── MANIFEST-000076
│ ├── OPTIONS-000012
│ └── OPTIONS-000079
├── mon-c
│ └── data
│ ├── external_log_to
│ ├── keyring
│ ├── kv_backend
│ └── store.db
│ ├── 000114.sst
│ ├── 000117.sst
│ ├── 000134.log
│ ├── CURRENT
│ ├── IDENTITY
│ ├── LOCK
│ ├── MANIFEST-000133
│ ├── OPTIONS-000131
│ └── OPTIONS-000136
├── mon-d
│ └── data
│ ├── external_log_to
│ ├── keyring
│ ├── kv_backend
│ ├── min_mon_release
│ └── store.db
│ ├── 000037.log
│ ├── 000039.sst
│ ├── CURRENT
│ ├── IDENTITY
│ ├── LOCK
│ ├── MANIFEST-000009
│ ├── OPTIONS-000006
│ └── OPTIONS-000012
└── rook-ceph
├── a6790216-24e1-47f1-9228-8887dbf073a0_ac6df81a-6b70-4eeb-ab8c-668c0f0d5661
│ ├── block -> /dev/vdc
│ ├── ceph_fsid
│ ├── fsid
│ ├── keyring
│ ├── ready
│ ├── require_osd_release
│ ├── type
│ └── whoami
├── client.admin.keyring
├── crash
│ └── posted
├── log
│ ├── ceph-mon.a.log
│ ├── ceph-mon.c.log
│ ├── ceph-mon.d.log
│ ├── ceph-osd.1.log
│ └── ceph-volume.log
└── rook-ceph.config
14 directories, 52 files
# 删除目录的文件
[root@master ~]# rm -rf /var/lib/rook