相关文档文档地址
-
IML (Index lifecycle management)
-
data stream
-
logstash grok plugins
创建索引生命周期策略
PUT _ilm/policy/dawn_policy
{
"policy":{
"phases":{
"hot":{
"min_age":"10s",
"actions":{
"rollover":{
"max_age":"5s",
"max_primary_shard_docs":2,
"max_docs":2,
"max_primary_shard_size":"10mb"
},
"set_priority":{
"priority":100
}
}
},
"warm":{
"min_age":"20s",
"actions":{
"readonly":{
},
"set_priority":{
"priority":50
}
}
},
"cold":{
"min_age":"30s",
"actions":{
"set_priority":{
"priority":0
}
}
},
"delete":{
"min_age":"40s",
"actions":{
"delete":{
}
}
}
}
}
}
注意:修改索引滚动的触发条件和每个阶段的最小时长min_age。此处为了快速展示效果,设置了比较短的时间,而es中检测index索引是否符合生命周期IML policy策略的时间间隔为10分钟,可通过如下API进行更改1秒检测一次。生产环境推荐使用默认值。
PUT /_cluster/settings
{
"transient": {
"indices.lifecycle.poll_interval": "1s"
}
}
创建索引模板
PUT /_index_template/logs-dawn-template
{
"index_patterns":[
"logs-dawn-*"
],
"data_stream":{
},
"priority":500,
"template":{
"settings":{
"index.lifecycle.name":"dawn_policy",
"index.number_of_replicas":"1",
"index.number_of_shards":"6"
},
"mappings":{
"properties":{
"@timestamp":{
"type":"date"
},
"hostName":{
"type":"keyword"
},
"level":{
"type":"keyword"
},
"line":{
"type":"keyword"
},
"logger":{
"type":"text"
},
"message":{
"type":"text"
},
"pid":{
"type":"keyword"
},
"serviceName":{
"type":"keyword"
},
"thread":{
"type":"keyword"
},
"traceId":{
"type":"keyword"
}
}
}
}
}
注意:
- "data_stream":{},代表开启data stream
- "index.lifecycle.name":"dawn_policy" 表示应用的索引生命周期策略
logstash 配置
input {
beats {
port => 5044
}
}
filter {
if [fields][serviceName] in ["dawn-user","dawn-order"] {
grok {
match => {
# (?m) 表示开启多行匹配
# %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{traceId}] %-5.5level ${PID} --- [%15.15thread] %logger{20} %5.5line : %msg%n
"message" => "(?m)%{TIMESTAMP_ISO8601:recordTime} \[%{DATA:traceId}\] %{LOGLEVEL:level} %{DATA:pid} --- \[%{DATA:thread}\] %{DATA:logger} %{NUMBER:line} : %{GREEDYDATA:message}"
}
}
} else if [fields][serviceName] == "dawn-common" {
grok {
match => {
# (?m) 表示开启多行匹配
# %d{yyyy-MM-dd HH:mm:ss.SSS} %-5.5level ${PID} --- [%15.15thread] %logger{20} %5.5line : %msg%n
"message" => "(?m)%{TIMESTAMP_ISO8601:recordTime} %{LOGLEVEL:level} %{DATA:pid} --- \[%{DATA:thread}\] %{DATA:logger} %{NUMBER:line} : %{GREEDYDATA:message}"
}
}
}
# 使用 date 过滤器将日期字段解析为 @timestamp
date {
match => ["recordTime", "yyyy-MM-dd HH:mm:ss.SSS"]
target => "@timestamp"
timezone => "Asia/Shanghai"
}
mutate {
add_field => {
"hostName" => "%{[host][name]}"
"[data_stream][type]" => "logs"
"[data_stream][dataset]" => "dawn-%{[fields][serviceEnv]}"
"[data_stream][namespace]" => "%{[fields][serviceName]}"
}
remove_field => [ "host","ecs","event","agent","tags","fields","@version","input","log","recordTime"]
strip => ["level", "thread","logger","pid"]
}
}
output {
stdout {}
elasticsearch {
hosts => ["http://dawn100.dawn.com:9200","http://dawn101.dawn.com:9200","http://dawn102.dawn.com:9200"]
data_stream => "true"
data_stream_sync_fields => "false"
}
}
注意:也可以在output中直接配置data_stream的相关属性,但是就不能使用变量来定义dataset和namespace,当然你可以使用条件判断根据特定变量将数据输入到不同的data stream中。如下:
output {
stdout {}
elasticsearch {
hosts => ["http://dawn100.dawn.com:9200","http://dawn101.dawn.com:9200","http://dawn102.dawn.com:9200"]
data_stream => "true"
data_stream_sync_fields => "false"
data_stream_type => "logs"
data_stream_dataset => "dawn-dev"
data_stream_namespace => "common-api"
}
}