Elasticsearch logstash 输出到elasticsearch的data stream

85 阅读2分钟

相关文档文档地址

  1. IML (Index lifecycle management)

    www.elastic.co/guide/en/el…

  2. data stream

    www.elastic.co/guide/en/el…

  3. logstash grok plugins

    www.elastic.co/guide/en/lo…

创建索引生命周期策略

PUT _ilm/policy/dawn_policy
{
    "policy":{
        "phases":{
            "hot":{
                "min_age":"10s",
                "actions":{
                    "rollover":{
                        "max_age":"5s",
                        "max_primary_shard_docs":2,
                        "max_docs":2,
                        "max_primary_shard_size":"10mb"
                    },
                    "set_priority":{
                        "priority":100
                    }
                }
            },
            "warm":{
                "min_age":"20s",
                "actions":{
                    "readonly":{

                    },
                    "set_priority":{
                        "priority":50
                    }
                }
            },
            "cold":{
                "min_age":"30s",
                "actions":{
                    "set_priority":{
                        "priority":0
                    }
                }
            },
            "delete":{
                "min_age":"40s",
                "actions":{
                    "delete":{

                    }
                }
            }
        }
    }
}

注意:修改索引滚动的触发条件和每个阶段的最小时长min_age。此处为了快速展示效果,设置了比较短的时间,而es中检测index索引是否符合生命周期IML policy策略的时间间隔为10分钟,可通过如下API进行更改1秒检测一次。生产环境推荐使用默认值。

PUT /_cluster/settings  
{  
    "transient": {  
        "indices.lifecycle.poll_interval": "1s"  
    }  
}

创建索引模板

PUT /_index_template/logs-dawn-template  
{
    "index_patterns":[
        "logs-dawn-*"
    ],
    "data_stream":{

    },
    "priority":500,
    "template":{
        "settings":{
            "index.lifecycle.name":"dawn_policy",
            "index.number_of_replicas":"1",
            "index.number_of_shards":"6"
        },
        "mappings":{
            "properties":{
                "@timestamp":{
                    "type":"date"
                },
                "hostName":{
                    "type":"keyword"
                },
                "level":{
                    "type":"keyword"
                },
                "line":{
                    "type":"keyword"
                },
                "logger":{
                    "type":"text"
                },
                "message":{
                    "type":"text"
                },
                "pid":{
                    "type":"keyword"
                },
                "serviceName":{
                    "type":"keyword"
                },
                "thread":{
                    "type":"keyword"
                },
                "traceId":{
                    "type":"keyword"
                }
            }
        }
    }
}

注意:

  • "data_stream":{},代表开启data stream
  • "index.lifecycle.name":"dawn_policy" 表示应用的索引生命周期策略

logstash 配置

input {  
    beats {  
        port => 5044  
    }  
}  
  
  
filter {  
    if [fields][serviceName] in ["dawn-user","dawn-order"] {  
        grok {  
            match => {  
            # (?m) 表示开启多行匹配  
            # %d{yyyy-MM-dd HH:mm:ss.SSS} [%X{traceId}] %-5.5level ${PID} --- [%15.15thread] %logger{20} %5.5line : %msg%n  
            "message" => "(?m)%{TIMESTAMP_ISO8601:recordTime} \[%{DATA:traceId}\] %{LOGLEVEL:level} %{DATA:pid} --- \[%{DATA:thread}\] %{DATA:logger} %{NUMBER:line} : %{GREEDYDATA:message}"  
            }  
        }  
    } else if [fields][serviceName] == "dawn-common" {  
        grok {  
            match => {  
            # (?m) 表示开启多行匹配  
            # %d{yyyy-MM-dd HH:mm:ss.SSS} %-5.5level ${PID} --- [%15.15thread] %logger{20} %5.5line : %msg%n  
            "message" => "(?m)%{TIMESTAMP_ISO8601:recordTime} %{LOGLEVEL:level} %{DATA:pid} --- \[%{DATA:thread}\] %{DATA:logger} %{NUMBER:line} : %{GREEDYDATA:message}"  
            }  
        }  
    }  


    # 使用 date 过滤器将日期字段解析为 @timestamp  
    date {  
        match => ["recordTime", "yyyy-MM-dd HH:mm:ss.SSS"]  
        target => "@timestamp"  
        timezone => "Asia/Shanghai"  
    }  


    mutate {  
        add_field => {  
            "hostName" => "%{[host][name]}"  
            "[data_stream][type]" => "logs"  
            "[data_stream][dataset]" => "dawn-%{[fields][serviceEnv]}"  
            "[data_stream][namespace]" => "%{[fields][serviceName]}"  
        }  
        remove_field => [ "host","ecs","event","agent","tags","fields","@version","input","log","recordTime"]  
        strip => ["level", "thread","logger","pid"]  
    }  
}  
  
  
  
output {  
stdout {}  
elasticsearch {  
hosts => ["http://dawn100.dawn.com:9200","http://dawn101.dawn.com:9200","http://dawn102.dawn.com:9200"]  
data_stream => "true"  
data_stream_sync_fields => "false"  
}  
}

注意:也可以在output中直接配置data_stream的相关属性,但是就不能使用变量来定义dataset和namespace,当然你可以使用条件判断根据特定变量将数据输入到不同的data stream中。如下:

output {  
    stdout {}  
    elasticsearch {  
        hosts => ["http://dawn100.dawn.com:9200","http://dawn101.dawn.com:9200","http://dawn102.dawn.com:9200"]  
        data_stream => "true"  
        data_stream_sync_fields => "false"  
        data_stream_type => "logs"  
        data_stream_dataset => "dawn-dev"  
        data_stream_namespace => "common-api"  
    }  
}

image.png

image.png