1.前言
出于成本原因,对ELK替换为doris,已满足数据库日志需求
采集链路:filebeat -> redis cluster -> logstash -> doris cluster-> 可视化展示
采集链路图:
所需工具:
filebeat:apache-doris-releases.oss-accelerate.aliyuncs.com/filebeat-do…
redis:redis.io/downloads/
logstash(版本需要小于8):www.elastic.co/downloads/p…
doris:doris.apache.org/zh-CN/downl…
安装部署自行查询教程
2.配置
慢日志格式
#第一种格式,远程连接=================================================
# Time: 2024-10-15T03:23:01.439080Z
# User@Host: Admin[Admin] @ [127.0.0.1] Id: 43328
# Query_time: 0.003293 Lock_time: 0.000099 Rows_sent: 0 Rows_examined: 139
SET timestamp=1728962581;
SELECT
CONCAT(table_schema, '/', table_name), engine
FROM information_schema.tables
WHERE engine NOT IN (
'MyISAM', 'InnoDB', 'CSV', 'MRG_MYISAM'
)
AND table_schema NOT IN (
'performance_schema', 'information_schema', 'mysql'
);
#第二种格式,本地连接=================================================
# Time: 2025-01-02T07:59:58.535689Z
# User@Host: root[root] @ localhost [] Id: 40958
# Query_time: 0.037491 Lock_time: 0.000151 Rows_sent: 0 Rows_examined: 115
SET timestamp=1735804798;
SELECT CONCAT(table_schema, '/', table_name), engine FROM information_schema.tables WHERE engine NOT IN ( 'MyISAM', 'InnoDB', 'CSV', 'MRG_MYISAM' ) AND table_schema NOT IN ( 'performance_schema', 'information_schema', 'mysql' );
#第三种格式带use的=================================================
# Time: 2024-10-18T08:26:24.866189Z
# User@Host: root[root] @ localhost [] Id: 1051376
# Query_time: 2.000137 Lock_time: 0.000000 Rows_sent: 1 Rows_examined: 0
use mysql;
SET timestamp=1729239984;
select sleep(2);
#第四种格式,这种暂未匹配=====================================
如果设置了log_throttle_queries_not_using_indexes参数做了抑制输出
要是也不需要采集,可以添加一个dorp过滤器,将整个时间丢弃即可
# Time: 170526 11:26:10 # User@Host: [] @ [] Id: 38
# Query_time: 0.021872 Lock_time: 0.008620 Rows_sent: 0 Rows_examined: 0
SET timestamp=1495769170;
throttle: 14 'index not used' warning(s) suppressed.;
filebeat配置文件
# ============================== Filebeat inputs ===============================
filebeat.inputs:
- type: log
id: mysqlshow
enabled: true
paths:
- /db/*/logs/mysql*slow.log //可以根据自己的慢日志路径修改
tags: ["mysqlslowlog"] //日志的标签
scan_frequency: 5s
multiline.type: pattern
multiline.pattern: "^# Time" //慢日志开始位置
multiline.negate: true
multiline.match: after
# ======================= Elasticsearch template setting =======================
setup.template.settings:
index.number_of_shards: 1
processors:
# 用 js script 插件将日志中的 \t 替换成空格,避免JSON解析报错
- script:
lang: javascript
source: >
function process(event) {
var msg = event.Get("message");
msg = msg.replace(/\t/g, " ");
event.Put("message", msg);
}
# ================================== Outputs ===================================
# output to Redis
output.redis:
hosts: ["172.30.0.1:6380","172.30.0.2:6380","172.30.0.3:6380"]
password: "123456789"
db: 0 //集群模式只有0号db
key: "mysqlslowlog"
timeout: 10
datatype: "list"
# ================================== Logging ===================================
logging.level: info
logging.to_file: true
logging.files.path: /usr/local/mysql-toolkits/filebeat-doris/logs
logging.files.keepfiles: 3
logstash配置文件
input {
redis {
id => "elk_redis_node1"
host => "172.30.0.1"
port => "6380"
password => "123456789"
db => "0"
data_type => "list"
key => "mysqlslowlog"
}
redis {
id => "elk_redis_node2"
host => "172.30.0.2"
port => "6380"
password => "123456789"
db => "0"
data_type => "list"
key => "mysqlslowlog"
}
redis {
id => "elk_redis_node3"
host => "172.30.0.3"
port => "6380"
password => "123456789"
db => "0"
data_type => "list"
key => "mysqlslowlog"
}
}
filter {
grok {
#match列表最开始匹配远程连接,匹配3种慢sql方式,1.远程连接,2.本地连接,3.带use db
#远程连接的及时带use也不会记录
# for mysql 5.7
match => [
"message","(?m)^#\sTime:\s%{TIMESTAMP_ISO8601:exec_time}.*#\s+User@Host:\s+%{USER:query_user}\[[^\[\]]+\]\s+@\s+\[%{HOSTNAME:query_host}\]\s*Id:\s+%{NUMBER:id:int}.*#\s+Query_time:\s+%{NUMBER:query_time:float}\s+Lock_time:\s+%{NUMBER:lock_time:float}\s+Rows_sent:\s+%{NUMBER:rows_sent:int}\s+Rows_examined:\s+%{NUMBER:rows_examined:int}.*SET\s+timestamp=%{NUMBER:exec_timestamp};\s*%{GREEDYDATA:exec_sql}",
"message","(?m)^#\sTime:\s%{TIMESTAMP_ISO8601:exec_time}.*#\s+User@Host:\s+%{USER:query_user}\[[^\[\]]+\]\s+@\s%{HOSTNAME:query_host}\s+\[\]\s*Id:\s+%{NUMBER:id:int}.*#\s+Query_time:\s+%{NUMBER:query_time:float}\s+Lock_time:\s+%{NUMBER:lock_time:float}\s+Rows_sent:\s+%{NUMBER:rows_sent:int}\s+Rows_examined:\s+%{NUMBER:rows_examined:int}.*SET\s+timestamp=%{NUMBER:exec_timestamp};\s*%{GREEDYDATA:exec_sql}",
"message","(?m)^#\sTime:\s%{TIMESTAMP_ISO8601:exec_time}.*#\s+User@Host:\s+%{USER:query_user}\[[^\[\]]+\]\s+@\s+\[%{HOSTNAME:query_host}\]\s*Id:\s+%{NUMBER:id:int}.*#\s+Query_time:\s+%{NUMBER:query_time:float}\s+Lock_time:\s+%{NUMBER:lock_time:float}\s+Rows_sent:\s+%{NUMBER:rows_sent:int}\s+Rows_examined:\s+%{NUMBER:rows_examined:int}.*(?:use %{DATA:database};\s*)SET\s+timestamp=%{NUMBER:exec_timestamp};\s*%{GREEDYDATA:exec_sql}",
"message","(?m)^#\sTime:\s%{TIMESTAMP_ISO8601:exec_time}.*#\s+User@Host:\s+%{USER:query_user}\[[^\[\]]+\]\s+@\s%{HOSTNAME:query_host}\s+\[\]\s*Id:\s+%{NUMBER:id:int}.*#\s+Query_time:\s+%{NUMBER:query_time:float}\s+Lock_time:\s+%{NUMBER:lock_time:float}\s+Rows_sent:\s+%{NUMBER:rows_sent:int}\s+Rows_examined:\s+%{NUMBER:rows_examined:int}.*(?:use %{DATA:database};\s*)SET\s+timestamp=%{NUMBER:exec_timestamp};\s*%{GREEDYDATA:exec_sql}",
"message","(?m)^#\sTime:\s%{TIMESTAMP_ISO8601:exec_time}.*#\s+User@Host:\s+%{USER:query_user}\[[^\[\]]+\]\s+@\s%{HOSTNAME:query_ng_host}\s+\[%{HOSTNAME:query_host}\]\s*Id:\s+%{NUMBER:id:int}.*#\s+Query_time:\s+%{NUMBER:query_time:float}\s+Lock_time:\s+%{NUMBER:lock_time:float}\s+Rows_sent:\s+%{NUMBER:rows_sent:int}\s+Rows_examined:\s+%{NUMBER:rows_examined:int}.*(?:use %{DATA:database};\s*)SET\s+timestamp=%{NUMBER:exec_timestamp};\s*%{GREEDYDATA:exec_sql}"
]
}
grok {
match => ["[log][file][path]","/db/%{POSINT:query_port}/logs/mysql_slow.log"]
match => ["[log][file][path]","/db/%{POSINT:query_port}/logs/mysql-slow.log"]
match => ["[log][file][path]","/db[^/]+/%{POSINT:query_port}/logs/mysql-slow.log"]
}
mutate {
add_field => {
"mysql_addr" => "%{[host][name]}:%{query_port}"
}
remove_field => ["message"]
}
#判断exec_time
if ![exec_time]{
mutate {
copy => { "@timestamp" => "exec_time" }
}
}
}
output {
# 这里判断tags标签是否等于 mysqlslowlog
if [tags][0] == "mysqlslowlog" {
doris {
http_hosts => ["http://172.30.0.4:8030","http://172.30.0.5:8030","http://172.30.0.6:8030"]
user => "root"
password => "123456789"
db => "dmp_db"
table => "mysql_slow_log"
headers => {
"format" => "json"
"read_json_by_line" => "true"
"load_to_single_tablet" => "true"
}
mapping => {
"mysql_addr" => "%{mysql_addr}"
"exec_time" => "%{exec_time}"
"query_user" => "%{query_user}"
"query_host" => "%{query_host}"
"query_port" => "%{query_port}"
"query_time" => "%{query_time}"
"lock_time" => "%{lock_time}"
"rows_sent" => "%{rows_sent}"
"rows_examined" => "%{rows_examined}"
"exec_timestamp" => "%{exec_timestamp}"
"exec_sql" => "%{exec_sql}"
}
log_request => true
}
}
#debug 调试
#stdout { codec => rubydebug }
}
doris建表语句
CREATE TABLE `mysql_slow_log` (
`id` bigint NOT NULL AUTO_INCREMENT(1),
`mysql_addr` varchar(64) NOT NULL COMMENT 'mysql实例地址',
`exec_time` datetime NOT NULL COMMENT '执行时间',
`query_user` varchar(32) NULL COMMENT '查询用户',
`query_host` varchar(128) NULL COMMENT '查询地址',
`query_port` int NULL COMMENT '查询端口',
`query_time` varchar(16) NULL COMMENT '查询时长',
`lock_time` varchar(16) NULL COMMENT '锁表时长',
`rows_sent` bigint NULL COMMENT '返回行数',
`rows_examined` bigint NULL COMMENT '检查行数',
`exec_timestamp` bigint NULL COMMENT '执行时间戳',
`exec_sql` text NULL COMMENT '执行SQL',
INDEX idx_mysql_addr (`mysql_addr`) USING INVERTED,
INDEX idx_exec_time (`exec_time`) USING INVERTED,
INDEX idx_query_user (`query_user`) USING INVERTED,
INDEX idx_query_host (`query_host`) USING INVERTED,
INDEX idx_query_port (`query_port`) USING INVERTED,
INDEX idx_query_time (`query_time`) USING INVERTED,
INDEX idx_lock_time (`lock_time`) USING INVERTED,
INDEX idx_rows_sent (`rows_sent`) USING INVERTED,
INDEX idx_rows_examined (`rows_examined`) USING INVERTED,
INDEX idx_exec_sql (`exec_sql`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true")
) ENGINE=OLAP
DUPLICATE KEY(`id`, `mysql_addr`)
COMMENT 'mysql慢日志'
PARTITION BY RANGE(`exec_time`)
(PARTITION p20240101 VALUES [('2023-08-01 00:00:00'), ('2024-01-01 00:00:00')),
PARTITION p20240301 VALUES [('2024-01-01 00:00:00'), ('2024-03-01 00:00:00')),
PARTITION p20240601 VALUES [('2024-03-01 00:00:00'), ('2024-06-01 00:00:00')),
PARTITION p20240901 VALUES [('2024-06-01 00:00:00'), ('2024-09-01 00:00:00')),
PARTITION p20241001 VALUES [('2024-09-01 00:00:00'), ('2024-10-01 00:00:00')),
PARTITION p20241101 VALUES [('2024-10-01 00:00:00'), ('2024-11-01 00:00:00')),
PARTITION p202411 VALUES [('2024-11-01 00:00:00'), ('2024-12-01 00:00:00')),
PARTITION p202412 VALUES [('2024-12-01 00:00:00'), ('2025-01-01 00:00:00')),
PARTITION p202501 VALUES [('2025-01-01 00:00:00'), ('2025-02-01 00:00:00')))
DISTRIBUTED BY RANDOM BUCKETS 3
PROPERTIES (
"min_load_replica_num" = "3",
"dynamic_partition.prefix" = "p",
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "MONTH",
"dynamic_partition.time_zone" = "Asia/Shanghai",
"dynamic_partition.start" = "-14",
"dynamic_partition.end" = "3",
"storage_medium" = "SSD"
);
3.启动
filebeat和logstash采集和过滤
#filebeat
./filebeat-doris -c filebeat-proxysqlaudit.yml 2>&1 1>/dev/null &
#logstash
./bin/logstash --path.data=/usr/local/logstash-7.8.1/mysql_show/data --path.logs=/usr/local/logstash-7.8.1/mysql_show/logs -f ./config/mysql_show.conf 2>&1 1>/dev/null &
采集到的数据