我正在参加「掘金·启航计划」
mongo故障模拟-权重调整
1.测试副本集是否是高可用
1.1.停掉mongodb-1:29017节点
[root@mongodb-1 /data1/mongodb_cluster]# ./mongodb/bin/mongod -f /data1/mongodb_cluster/mongodb_29017/conf/mongo_29017.conf --shutdown
killing process with pid: 57659
查看是否可用查看,插入,删除
故障自动转移成功
1.2.开启mongodb-1:29017节点,查看是否转为primary
1.3.停掉mongodb-1:29018、mongodb-1:29017节点,查看29019节点是否成为primary
发现29019节点没有成为primary,其他节点也都是不正常的,这个架构就不可用了
[root@mongodb-1 /data1/mongodb_cluster]# ./mongodb/bin/mongo mongodb-1:29019
MongoDB shell version v4.0.14
connecting to: mongodb://mongodb-1:29019/test?gssapiServiceName=mongodb
Implicit session: session { "id" : UUID("8332f408-753a-4008-bd83-d8283f324fcb") }
MongoDB server version: 4.0.14
Server has startup warnings:
2022-09-15T14:40:10.842+0800 I CONTROL [initandlisten]
2022-09-15T14:40:10.842+0800 I CONTROL [initandlisten] ** WARNING: Access control is not enabled for the database.
2022-09-15T14:40:10.842+0800 I CONTROL [initandlisten] ** Read and write access to data and configuration is unrestricted.
2022-09-15T14:40:10.842+0800 I CONTROL [initandlisten] ** WARNING: You are running this process as the root user, which is not recommended.
2022-09-15T14:40:10.842+0800 I CONTROL [initandlisten]
dba01:SECONDARY>
dba01:SECONDARY>
dba01:SECONDARY>
dba01:SECONDARY>
dba01:SECONDARY>
dba01:SECONDARY>
dba01:SECONDARY> rs.status();
{
"set" : "dba01",
"date" : ISODate("2022-09-15T06:41:10.443Z"),
"myState" : 2,
"term" : NumberLong(6),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"appliedOpTime" : {
"ts" : Timestamp(1663223977, 1),
"t" : NumberLong(5)
},
"durableOpTime" : {
"ts" : Timestamp(1663223977, 1),
"t" : NumberLong(5)
}
},
"lastStableCheckpointTimestamp" : Timestamp(1663223977, 1),
"members" : [
{
"_id" : 0,
"name" : "mongodb-1:29017",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2022-09-15T06:41:09.681Z"),
"lastHeartbeatRecv" : ISODate("1970-01-01T00:00:00Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Error connecting to mongodb-1:29017 (192.168.146.138:29017) :: caused by :: Connection refused",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
},
{
"_id" : 1,
"name" : "mongodb-1:29018",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2022-09-15T06:41:10.388Z"),
"lastHeartbeatRecv" : ISODate("2022-09-15T06:41:07.764Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Error connecting to mongodb-1:29018 (192.168.146.138:29018) :: caused by :: Connection refused",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
},
{
"_id" : 2,
"name" : "mongodb-1:29019",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 60,
"optime" : {
"ts" : Timestamp(1663223977, 1),
"t" : NumberLong(5)
},
"optimeDate" : ISODate("2022-09-15T06:39:37Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "could not find member to sync from",
"configVersion" : 1,
"self" : true,
"lastHeartbeatMessage" : ""
}
],
"ok" : 1,
"operationTime" : Timestamp(1663223977, 1),
"$clusterTime" : {
"clusterTime" : Timestamp(1663223977, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
dba01:SECONDARY>
1.3.1.修复这个故障
1.启动29018
[root@mongodb-1 /data1/mongodb_cluster]# ./mongodb/bin/mongod -f /data1/mongodb_cluster/mongodb_29018/conf/mongo_29018.conf
about to fork child process, waiting until server is ready for connections.
forked process: 103563
child process started successfully, parent exiting
2.登录29019
[root@mongodb-1 /data1/mongodb_cluster]# ./mongodb/bin/mongo mongodb-1:29019
2.权重调整
2.1.查看集群配置信息
[root@mongodb-1 /data1/mongodb_cluster]# ./mongodb/bin/mongo mongodb-1:29017
dba01:PRIMARY> rs.conf()
#目前权重都是一样的
2.2.调整权重,将29019调为primary
[root@mongodb-1 /data1/mongodb_cluster]# ./mongodb/bin/mongo mongodb-1:29017
dba01:PRIMARY> config = rs.conf()
dba01:PRIMARY> config.members[2].priority=100
100
dba01:PRIMARY> rs.reconfig(config)
{
"ok" : 1,
"operationTime" : Timestamp(1663227028, 1),
"$clusterTime" : {
"clusterTime" : Timestamp(1663227028, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
dba01:PRIMARY> rs.conf()
解释说明:
config = rs.conf() 导入数据
config.members[2].priority=100:将[2]也就是29019节点的权重调整为100;如果想把29018调整为primary,将2改为1---config.members[1].priority=100
rs.reconfig(config) 重新加载生效
查看已变为secondary节点
2.3.调低权重,重新选举
dba01:PRIMARY> rs.stepDown()