MySQL:采集慢日志

267 阅读6分钟

1.前言

出于成本原因,对ELK替换为doris,已满足数据库日志需求

采集链路:filebeat -> redis cluster -> logstash -> doris cluster-> 可视化展示

采集链路图:

image.png

所需工具:

filebeat:apache-doris-releases.oss-accelerate.aliyuncs.com/filebeat-do…

redis:redis.io/downloads/

logstash(版本需要小于8):www.elastic.co/downloads/p…

doris:doris.apache.org/zh-CN/downl…

安装部署自行查询教程

2.配置

慢日志格式

#第一种格式,远程连接=================================================
# Time: 2024-10-15T03:23:01.439080Z
# User@Host: Admin[Admin] @  [127.0.0.1]  Id: 43328
# Query_time: 0.003293  Lock_time: 0.000099 Rows_sent: 0  Rows_examined: 139
SET timestamp=1728962581;
SELECT
  CONCAT(table_schema, '/', table_name), engine
FROM information_schema.tables
WHERE engine NOT IN (
  'MyISAM', 'InnoDB', 'CSV', 'MRG_MYISAM'
)
AND table_schema NOT IN (
  'performance_schema', 'information_schema',  'mysql'
);

#第二种格式,本地连接=================================================
# Time: 2025-01-02T07:59:58.535689Z
# User@Host: root[root] @ localhost []  Id: 40958
# Query_time: 0.037491  Lock_time: 0.000151 Rows_sent: 0  Rows_examined: 115
SET timestamp=1735804798;
SELECT CONCAT(table_schema, '/', table_name), engine FROM information_schema.tables WHERE engine NOT IN ( 'MyISAM', 'InnoDB', 'CSV', 'MRG_MYISAM' ) AND table_schema NOT IN ( 'performance_schema', 'information_schema', 'mysql' );


#第三种格式带use的=================================================
# Time: 2024-10-18T08:26:24.866189Z
# User@Host: root[root] @ localhost []  Id: 1051376
# Query_time: 2.000137  Lock_time: 0.000000 Rows_sent: 1  Rows_examined: 0
use mysql;
SET timestamp=1729239984;
select sleep(2);

#第四种格式,这种暂未匹配=====================================
如果设置了log_throttle_queries_not_using_indexes参数做了抑制输出
要是也不需要采集,可以添加一个dorp过滤器,将整个时间丢弃即可
# Time: 170526 11:26:10 # User@Host: [] @ [] Id: 38 
# Query_time: 0.021872 Lock_time: 0.008620 Rows_sent: 0 Rows_examined: 0 
SET timestamp=1495769170; 
throttle: 14 'index not used' warning(s) suppressed.;

filebeat配置文件

# ============================== Filebeat inputs ===============================
filebeat.inputs:
- type: log
  id: mysqlshow
  enabled: true
  paths:
    - /db/*/logs/mysql*slow.log    //可以根据自己的慢日志路径修改
  tags: ["mysqlslowlog"]           //日志的标签
  scan_frequency: 5s
  multiline.type: pattern
  multiline.pattern: "^# Time"    //慢日志开始位置
  multiline.negate: true
  multiline.match: after
# ======================= Elasticsearch template setting =======================
setup.template.settings:
  index.number_of_shards: 1
processors:
# 用 js script 插件将日志中的 \t 替换成空格,避免JSON解析报错
- script:
    lang: javascript
    source: >
        function process(event) {
            var msg = event.Get("message");
            msg = msg.replace(/\t/g, "  ");
            event.Put("message", msg);
        }
# ================================== Outputs ===================================
# output to Redis
output.redis:
    hosts: ["172.30.0.1:6380","172.30.0.2:6380","172.30.0.3:6380"]
    password: "123456789"
    db: 0 //集群模式只有0号db
    key: "mysqlslowlog"
    timeout: 10
    datatype: "list"
# ================================== Logging ===================================
logging.level: info
logging.to_file: true
logging.files.path: /usr/local/mysql-toolkits/filebeat-doris/logs
logging.files.keepfiles: 3

logstash配置文件

input {
    redis {
        id => "elk_redis_node1"
        host => "172.30.0.1"
        port => "6380"
        password => "123456789"
        db => "0"
        data_type => "list"
        key => "mysqlslowlog"
    }
    redis {
        id => "elk_redis_node2"
        host => "172.30.0.2"
        port => "6380"
        password => "123456789"
        db => "0"
        data_type => "list"
        key => "mysqlslowlog"
    }
    redis {
        id => "elk_redis_node3"
        host => "172.30.0.3"
        port => "6380"
        password => "123456789"
        db => "0"
        data_type => "list"
        key => "mysqlslowlog"
    }
}
filter {
    grok {
        #match列表最开始匹配远程连接,匹配3种慢sql方式,1.远程连接,2.本地连接,3.带use db
        #远程连接的及时带use也不会记录
        # for mysql 5.7
        match => [
          "message","(?m)^#\sTime:\s%{TIMESTAMP_ISO8601:exec_time}.*#\s+User@Host:\s+%{USER:query_user}\[[^\[\]]+\]\s+@\s+\[%{HOSTNAME:query_host}\]\s*Id:\s+%{NUMBER:id:int}.*#\s+Query_time:\s+%{NUMBER:query_time:float}\s+Lock_time:\s+%{NUMBER:lock_time:float}\s+Rows_sent:\s+%{NUMBER:rows_sent:int}\s+Rows_examined:\s+%{NUMBER:rows_examined:int}.*SET\s+timestamp=%{NUMBER:exec_timestamp};\s*%{GREEDYDATA:exec_sql}",
"message","(?m)^#\sTime:\s%{TIMESTAMP_ISO8601:exec_time}.*#\s+User@Host:\s+%{USER:query_user}\[[^\[\]]+\]\s+@\s%{HOSTNAME:query_host}\s+\[\]\s*Id:\s+%{NUMBER:id:int}.*#\s+Query_time:\s+%{NUMBER:query_time:float}\s+Lock_time:\s+%{NUMBER:lock_time:float}\s+Rows_sent:\s+%{NUMBER:rows_sent:int}\s+Rows_examined:\s+%{NUMBER:rows_examined:int}.*SET\s+timestamp=%{NUMBER:exec_timestamp};\s*%{GREEDYDATA:exec_sql}",
"message","(?m)^#\sTime:\s%{TIMESTAMP_ISO8601:exec_time}.*#\s+User@Host:\s+%{USER:query_user}\[[^\[\]]+\]\s+@\s+\[%{HOSTNAME:query_host}\]\s*Id:\s+%{NUMBER:id:int}.*#\s+Query_time:\s+%{NUMBER:query_time:float}\s+Lock_time:\s+%{NUMBER:lock_time:float}\s+Rows_sent:\s+%{NUMBER:rows_sent:int}\s+Rows_examined:\s+%{NUMBER:rows_examined:int}.*(?:use %{DATA:database};\s*)SET\s+timestamp=%{NUMBER:exec_timestamp};\s*%{GREEDYDATA:exec_sql}",
"message","(?m)^#\sTime:\s%{TIMESTAMP_ISO8601:exec_time}.*#\s+User@Host:\s+%{USER:query_user}\[[^\[\]]+\]\s+@\s%{HOSTNAME:query_host}\s+\[\]\s*Id:\s+%{NUMBER:id:int}.*#\s+Query_time:\s+%{NUMBER:query_time:float}\s+Lock_time:\s+%{NUMBER:lock_time:float}\s+Rows_sent:\s+%{NUMBER:rows_sent:int}\s+Rows_examined:\s+%{NUMBER:rows_examined:int}.*(?:use %{DATA:database};\s*)SET\s+timestamp=%{NUMBER:exec_timestamp};\s*%{GREEDYDATA:exec_sql}",
"message","(?m)^#\sTime:\s%{TIMESTAMP_ISO8601:exec_time}.*#\s+User@Host:\s+%{USER:query_user}\[[^\[\]]+\]\s+@\s%{HOSTNAME:query_ng_host}\s+\[%{HOSTNAME:query_host}\]\s*Id:\s+%{NUMBER:id:int}.*#\s+Query_time:\s+%{NUMBER:query_time:float}\s+Lock_time:\s+%{NUMBER:lock_time:float}\s+Rows_sent:\s+%{NUMBER:rows_sent:int}\s+Rows_examined:\s+%{NUMBER:rows_examined:int}.*(?:use %{DATA:database};\s*)SET\s+timestamp=%{NUMBER:exec_timestamp};\s*%{GREEDYDATA:exec_sql}"
          ]
    }
  grok {
    match => ["[log][file][path]","/db/%{POSINT:query_port}/logs/mysql_slow.log"]
    match => ["[log][file][path]","/db/%{POSINT:query_port}/logs/mysql-slow.log"]
    match => ["[log][file][path]","/db[^/]+/%{POSINT:query_port}/logs/mysql-slow.log"]
  }

  mutate {
    add_field => {
      "mysql_addr" => "%{[host][name]}:%{query_port}"
    }
    remove_field => ["message"]
  }
  #判断exec_time
  if ![exec_time]{
    mutate {
        copy => { "@timestamp" => "exec_time" }
    }
  }
}

output {
   # 这里判断tags标签是否等于 mysqlslowlog
  if [tags][0] == "mysqlslowlog" {
        doris {
            http_hosts => ["http://172.30.0.4:8030","http://172.30.0.5:8030","http://172.30.0.6:8030"]
            user => "root"
            password => "123456789"
            db => "dmp_db"
            table => "mysql_slow_log"
            headers => {
              "format" => "json"
              "read_json_by_line" => "true"
              "load_to_single_tablet" => "true"
            }
            mapping => {
              "mysql_addr" => "%{mysql_addr}"
              "exec_time" => "%{exec_time}"
              "query_user" => "%{query_user}"
              "query_host" => "%{query_host}"
              "query_port" => "%{query_port}"
              "query_time" => "%{query_time}"
              "lock_time" => "%{lock_time}"
              "rows_sent" => "%{rows_sent}"
              "rows_examined" => "%{rows_examined}"
              "exec_timestamp" => "%{exec_timestamp}"
              "exec_sql" => "%{exec_sql}"
            }
            log_request => true
        }
    }
    #debug 调试
    #stdout { codec => rubydebug }
}

doris建表语句

CREATE TABLE `mysql_slow_log` (
  `id` bigint NOT NULL AUTO_INCREMENT(1),
  `mysql_addr` varchar(64) NOT NULL COMMENT 'mysql实例地址',
  `exec_time` datetime NOT NULL COMMENT '执行时间',
  `query_user` varchar(32) NULL COMMENT '查询用户',
  `query_host` varchar(128) NULL COMMENT '查询地址',
  `query_port` int NULL COMMENT '查询端口',
  `query_time` varchar(16) NULL COMMENT '查询时长',
  `lock_time` varchar(16) NULL COMMENT '锁表时长',
  `rows_sent` bigint NULL COMMENT '返回行数',
  `rows_examined` bigint NULL COMMENT '检查行数',
  `exec_timestamp` bigint NULL COMMENT '执行时间戳',
  `exec_sql` text NULL COMMENT '执行SQL',
  INDEX idx_mysql_addr (`mysql_addr`) USING INVERTED,
  INDEX idx_exec_time (`exec_time`) USING INVERTED,
  INDEX idx_query_user (`query_user`) USING INVERTED,
  INDEX idx_query_host (`query_host`) USING INVERTED,
  INDEX idx_query_port (`query_port`) USING INVERTED,
  INDEX idx_query_time (`query_time`) USING INVERTED,
  INDEX idx_lock_time (`lock_time`) USING INVERTED,
  INDEX idx_rows_sent (`rows_sent`) USING INVERTED,
  INDEX idx_rows_examined (`rows_examined`) USING INVERTED,
  INDEX idx_exec_sql (`exec_sql`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true")
) ENGINE=OLAP
DUPLICATE KEY(`id`, `mysql_addr`)
COMMENT 'mysql慢日志'
PARTITION BY RANGE(`exec_time`)
(PARTITION p20240101 VALUES [('2023-08-01 00:00:00'), ('2024-01-01 00:00:00')),
PARTITION p20240301 VALUES [('2024-01-01 00:00:00'), ('2024-03-01 00:00:00')),
PARTITION p20240601 VALUES [('2024-03-01 00:00:00'), ('2024-06-01 00:00:00')),
PARTITION p20240901 VALUES [('2024-06-01 00:00:00'), ('2024-09-01 00:00:00')),
PARTITION p20241001 VALUES [('2024-09-01 00:00:00'), ('2024-10-01 00:00:00')),
PARTITION p20241101 VALUES [('2024-10-01 00:00:00'), ('2024-11-01 00:00:00')),
PARTITION p202411 VALUES [('2024-11-01 00:00:00'), ('2024-12-01 00:00:00')),
PARTITION p202412 VALUES [('2024-12-01 00:00:00'), ('2025-01-01 00:00:00')),
PARTITION p202501 VALUES [('2025-01-01 00:00:00'), ('2025-02-01 00:00:00')))
DISTRIBUTED BY RANDOM BUCKETS 3
PROPERTIES (
"min_load_replica_num" = "3",
"dynamic_partition.prefix" = "p",
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "MONTH",
"dynamic_partition.time_zone" = "Asia/Shanghai",
"dynamic_partition.start" = "-14",
"dynamic_partition.end" = "3",
"storage_medium" = "SSD"
);

3.启动

filebeat和logstash采集和过滤

#filebeat
./filebeat-doris -c filebeat-proxysqlaudit.yml 2>&1 1>/dev/null &


#logstash
./bin/logstash --path.data=/usr/local/logstash-7.8.1/mysql_show/data --path.logs=/usr/local/logstash-7.8.1/mysql_show/logs  -f ./config/mysql_show.conf 2>&1 1>/dev/null &

采集到的数据

image.png

4.可视化

image.png

image.png