1. 工具简介
官方简介
Orchestrator是一款开源的MySQL复制拓扑管理工具,采用go语言编写,支持MySQL主从复制拓扑关系的调整、支持MySQL主库故障自动切换、手动主从切换等功能。
Orchestrator后台依赖于MySQL或者SQLite存储元数据,能够提供Web界面展示MySQL集群的拓扑关系及实例状态,通过Web界面可更改MySQL实例的部分配置信息,同时也提供命令行和api接口,以便更加灵活的自动化运维管理。
相比于MHA,Orchestrator更加偏重于复制拓扑关系的管理,能够实现MySQL任一复制拓扑关系的调整,并在此基础上,实现MySQL高可用,另外Orchestrator自身可以部署多个节点,通过raft分布式一致性协议,保证自身的高可用。
简而言之, Orchestrator可以帮助我们实现一下几种核心功能:
- 监控mysql集群状态
- 主库宕机后,自动选择从库提升为主库
- 主从切换
- 简易的修改mysql集群拓扑结构方式
2. 本次部署思路
- 使用docker-compose进行单节点部署
- 使用集群外的mysql数据源存储Orchestrator元数据
3. 准备工作
- 已完成上一篇文章中部署的一主两从集群集群
- 准备集群外的mysql数据库, 用以存储Orchestrator元数据
4. 部署流程
4.1 创建Orchestrator存储库及账户
在集群外的数据库创建orchestrator数据库, 并创建对应权限的账户
create user 'orchestrator'@'%' identified by '123456';
grant all privileges on orchestrator.* to "orchestrator"@'%';
4.2 创建集群监控账户
在主库创建用于Orchestrator监控数据库的账户, 从库会自动同步(建议权限给大一些,其他系统也可以使用该账户进行监控)
create user 'monitor'@'%' identified by '123456';
grant all privileges on *.* to "monitor"@'%';
4.3 docker-compose配置
docker-compose.yml
version: '3'
services:
mysql:
image: "orchestrator"
container_name: orchestrator
restart: always
environment:
TZ: "Asia/Shanghai"
volumes:
- "./conf.json:/etc/orchestrator.conf.json"
ports:
- "13509:3000"
4.3 Orchestrator配置文件
配置较多, 只对重点配置进行讲解,其他使用默认即可, 完整配置文件会贴在文章末尾
##orchestrator自己使用的数据库配置
"MySQLOrchestratorHost": "10.1.12.12",
"MySQLOrchestratorPort": 13306,
"MySQLOrchestratorDatabase": "orchestrator",
"MySQLOrchestratorUser": "orchestrator",
"MySQLOrchestratorPassword": "123456",
##orchestrator监控目标数据库配置
"MySQLTopologyUser": "monitor",
"MySQLTopologyPassword": "123456",
##解析相关配置, 依据部署环境修改, 此处为不解析HostName
"HostnameResolveMethod": "none",
"MySQLHostnameResolveMethod": "",
##自动恢复规则, 配置为*意思是所有集群都自动恢复
"RecoverMasterClusterFilters": [ "*" ], "RecoverIntermediateMasterClusterFilters": [ "*" ],
4.4 运行
docker-compose up -d
观察日志无报错即可通过浏览器访问web界面,如 http://ip:port/web/clusters
进入页面后, 选择Clusters-->Discover, 输入主库ip,port; 点击Submit即可发现主库, 从库会在获取主库信息后自动获取
5. 测试
可以测试一下场景体验Orchestrator的功能:
- 关闭从库1, 观察拓扑
- 开启从库1, 观察拓扑
- 关闭主库1, 观察是否提升某个从库
- 拖动从库, 使从库提升为主库
6. 完整Orchestrator配置
{
"Debug": true,
"EnableSyslog": false,
"ListenAddress": ":3000",
"MySQLTopologyUser": "monitor",
"MySQLTopologyPassword": "lz8jYsH@tJXg!1Ek1d2u",
"MySQLTopologyCredentialsConfigFile": "",
"MySQLTopologySSLPrivateKeyFile": "",
"MySQLTopologySSLCertFile": "",
"MySQLTopologySSLCAFile": "",
"MySQLTopologySSLSkipVerify": true,
"MySQLTopologyUseMutualTLS": false,
"MySQLOrchestratorHost": "10.1.12.12",
"MySQLOrchestratorPort": 13306,
"MySQLOrchestratorDatabase": "orchestrator",
"MySQLOrchestratorUser": "orchestrator",
"MySQLOrchestratorPassword": "123456",
"MySQLOrchestratorCredentialsConfigFile": "",
"MySQLOrchestratorSSLPrivateKeyFile": "",
"MySQLOrchestratorSSLCertFile": "",
"MySQLOrchestratorSSLCAFile": "",
"MySQLOrchestratorSSLSkipVerify": true,
"MySQLOrchestratorUseMutualTLS": false,
"MySQLConnectTimeoutSeconds": 1,
"DefaultInstancePort": 13500,
"DiscoverByShowSlaveHosts": true,
"InstancePollSeconds": 5,
"DiscoveryIgnoreReplicaHostnameFilters": [
"a_host_i_want_to_ignore[.]example[.]com",
".*[.]ignore_all_hosts_from_this_domain[.]example[.]com",
"a_host_with_extra_port_i_want_to_ignore[.]example[.]com:3307"
],
"UnseenInstanceForgetHours": 240,
"SnapshotTopologiesIntervalHours": 0,
"InstanceBulkOperationsWaitTimeoutSeconds": 10,
"HostnameResolveMethod": "none",
"MySQLHostnameResolveMethod": "",
"SkipBinlogServerUnresolveCheck": true,
"ExpiryHostnameResolvesMinutes": 60,
"RejectHostnameResolvePattern": "",
"ReasonableReplicationLagSeconds": 10,
"ProblemIgnoreHostnameFilters": [],
"VerifyReplicationFilters": false,
"ReasonableMaintenanceReplicationLagSeconds": 20,
"CandidateInstanceExpireMinutes": 60,
"AuditLogFile": "",
"AuditToSyslog": false,
"RemoveTextFromHostnameDisplay": ".mydomain.com:3306",
"ReadOnly": false,
"AuthenticationMethod": "",
"HTTPAuthUser": "",
"HTTPAuthPassword": "",
"AuthUserHeader": "",
"PowerAuthUsers": [
"*"
],
"ClusterNameToAlias": {
"127.0.0.1": "test suite"
},
"ReplicationLagQuery": "",
"DetectClusterAliasQuery": "SELECT SUBSTRING_INDEX(@@hostname, '.', 1)",
"DetectClusterDomainQuery": "",
"DetectInstanceAliasQuery": "",
"DetectPromotionRuleQuery": "",
"DataCenterPattern": "[.]([^.]+)[.][^.]+[.]mydomain[.]com",
"PhysicalEnvironmentPattern": "[.]([^.]+[.][^.]+)[.]mydomain[.]com",
"PromotionIgnoreHostnameFilters": [],
"DetectSemiSyncEnforcedQuery": "",
"ServeAgentsHttp": false,
"AgentsServerPort": ":3001",
"AgentsUseSSL": false,
"AgentsUseMutualTLS": false,
"AgentSSLSkipVerify": false,
"AgentSSLPrivateKeyFile": "",
"AgentSSLCertFile": "",
"AgentSSLCAFile": "",
"AgentSSLValidOUs": [],
"UseSSL": false,
"UseMutualTLS": false,
"SSLSkipVerify": false,
"SSLPrivateKeyFile": "",
"SSLCertFile": "",
"SSLCAFile": "",
"SSLValidOUs": [],
"URLPrefix": "",
"StatusEndpoint": "/api/status",
"StatusSimpleHealth": true,
"StatusOUVerify": false,
"AgentPollMinutes": 60,
"UnseenAgentForgetHours": 6,
"StaleSeedFailMinutes": 60,
"SeedAcceptableBytesDiff": 8192,
"PseudoGTIDPattern": "",
"PseudoGTIDPatternIsFixedSubstring": false,
"PseudoGTIDMonotonicHint": "asc:",
"DetectPseudoGTIDQuery": "",
"BinlogEventsChunkSize": 10000,
"SkipBinlogEventsContaining": [],
"ReduceReplicationAnalysisCount": true,
"FailureDetectionPeriodBlockMinutes": 60,
"FailMasterPromotionOnLagMinutes": 0,
"RecoveryPeriodBlockSeconds": 3600,
"RecoveryIgnoreHostnameFilters": [],
"RecoverMasterClusterFilters": [
"*"
],
"RecoverIntermediateMasterClusterFilters": [
"*"
],
"OnFailureDetectionProcesses": [
"echo 'Detected {failureType} on {failureCluster}. Affected replicas: {countSlaves}' >> /tmp/recovery.log"
],
"PreGracefulTakeoverProcesses": [
"echo 'Planned takeover about to take place on {failureCluster}. Master will switch to read_only' >> /tmp/recovery.log"
],
"PreFailoverProcesses": [
"echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/recovery.log"
],
"PostFailoverProcesses": [
"echo '(for all types) Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Successor: {successorHost}:{successorPort}' >> /tmp/recovery.log"
],
"PostUnsuccessfulFailoverProcesses": [],
"PostMasterFailoverProcesses": [
"echo 'Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Promoted: {successorHost}:{successorPort}' >> /tmp/recovery.log"
],
"PostIntermediateMasterFailoverProcesses": [
"echo 'Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Successor: {successorHost}:{successorPort}' >> /tmp/recovery.log"
],
"PostGracefulTakeoverProcesses": [
"echo 'Planned takeover complete' >> /tmp/recovery.log"
],
"CoMasterRecoveryMustPromoteOtherCoMaster": true,
"DetachLostSlavesAfterMasterFailover": true,
"ApplyMySQLPromotionAfterMasterFailover": true,
"PreventCrossDataCenterMasterFailover": false,
"PreventCrossRegionMasterFailover": false,
"MasterFailoverDetachReplicaMasterHost": false,
"MasterFailoverLostInstancesDowntimeMinutes": 0,
"PostponeReplicaRecoveryOnLagMinutes": 0,
"OSCIgnoreHostnameFilters": [],
"GraphiteAddr": "",
"GraphitePath": "",
"GraphiteConvertHostnameDotsToUnderscores": true,
"ConsulAddress": "",
"ConsulAclToken": "",
"ConsulKVStoreProvider": "consul"
}