prometheus-nginxlog-exporter
github地址
有rpm包和tar包2种
prometheus-nginxlog-exporter_1.11.0_linux_amd64.rpm
prometheus-nginxlog-exporter_1.11.0_linux_amd64.tar.gz
1. 安装
他有三种安装方式,我们可以依然采用rpm包的安装方式,具体安装如下:
wget https://github.com/martin-helmich/prometheus-nginxlog-exporter/releases/download/v1.11.0/prometheus-nginxlog-exporter_1.11.0_linux_amd64.rpm
[root@vms21 ~]# rpm -ivh prometheus-nginxlog-exporter_1.11.0_linux_amd64.rpm
警告:prometheus-nginxlog-exporter_1.11.0_linux_amd64.rpm: 头V4 RSA/SHA256 Signature, 密钥 ID f6b64258: NOKEY
准备中... ################################# [100%]
正在升级/安装...
1:prometheus-nginxlog-exporter-0:1.################################# [100%]
Created symlink from /etc/systemd/system/multi-user.target.wants/prometheus-nginxlog-exporter.service to /usr/lib/systemd/system/prometheus-nginxlog-exporter.service.
修改nginx配置文件
log_format custom '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for" "$request_length" "$upstream_response_time" "$request_time" "$upstream_cache_status"';
access_log /var/log/nginx/access.log custom;
error_log /var/log/nginx/error.log;
[root@vms21 ~]# nginx -t
nginx: [warn] conflicting server name "_" on 0.0.0.0:80, ignored
nginx: the configuration file /etc/nginx/nginx.conf syntax is ok
nginx: configuration file /etc/nginx/nginx.conf test is successful
[root@vms21 ~]# systemctl start nginx
修改prometheus-nginxlog-exporter 配置文件
/etc/prometheus/nginxlog_exporter.yml
listen:
port: 4040
address: "0.0.0.0"
consul:
enable: false
namespaces:
- name: myapp
relabel_configs:
- target_label: request_uri
from: request
split: 2
separator: ' '
format: "$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\" \"$request_length\" \"$upstream_response_time\" \"$request_time\" \"$upstream_cache_status\""
source:
files:
- /var/log/nginx/access.log
labels:
service: "myapp"
environment: "prod"
hostname: "http://myapp.domain.com"
histogram_buckets: [.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10]
修改systemd启动文件
vim /usr/lib/systemd/system/prometheus-nginxlog-exporter.service
[root@vms21 nginx]# cat /usr/lib/systemd/system/prometheus-nginxlog-exporter.service
[Unit]
Description=NGINX metrics exporter for Prometheus
After=network-online.target
[Service]
ExecStart=/usr/sbin/prometheus-nginxlog-exporter -config-file /etc/prometheus/nginxlog_exporter.yml
Restart=always
ProtectSystem=full
CapabilityBoundingSet=
[Install]
WantedBy=multi-user.target
启动
systemctl restart prometheus-nginxlog-exporter
myapp_http_upstream_time_seconds_hist_bucket{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200",le="0.005"} 1
myapp_http_upstream_time_seconds_hist_bucket{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200",le="0.01"} 1
myapp_http_upstream_time_seconds_hist_bucket{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200",le="0.025"} 1
myapp_http_upstream_time_seconds_hist_bucket{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200",le="0.05"} 1
myapp_http_upstream_time_seconds_hist_bucket{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200",le="0.1"} 1
myapp_http_upstream_time_seconds_hist_bucket{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200",le="0.25"} 1
myapp_http_upstream_time_seconds_hist_bucket{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200",le="0.5"} 1
myapp_http_upstream_time_seconds_hist_bucket{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200",le="1"} 1
myapp_http_upstream_time_seconds_hist_bucket{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200",le="2.5"} 1
myapp_http_upstream_time_seconds_hist_bucket{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200",le="5"} 1
myapp_http_upstream_time_seconds_hist_bucket{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200",le="10"} 1
myapp_http_upstream_time_seconds_hist_bucket{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200",le="+Inf"} 1
myapp_http_upstream_time_seconds_hist_sum{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200"} 0.001
myapp_http_upstream_time_seconds_hist_count{environment="prod",hostname="http://myapp.domain.com",method="GET",request_uri="/",service="myapp",status="200"} 1
数据有了
这边注意几个细节:
-
namespaces 的 name 就是我们抛送指标的前缀,所以要符合指标的格式
-
<namespace>_parse_errors_total 关注这个指标查看是否有无法解析的日志行数
-
format 格式要按照你的 nginx 的 log_format 来搭建,如果发现没有数据,那么就是 format 有问题,这时候就可以看 error log 逐步排查
-
你的nginx的日志变量没有和prometheus-nginxlog-exporter官方的一致也会导致没有指标的数据
例如
<namespace>_http_response_count_total已处理的 HTTP 请求/响应的总量。 <namespace>_http_response_size_bytes传输的内容总量(以字节为单位)。 <namespace>_http_request_size_bytes接收的流量总量(以字节为单位)。此指标需要日志格式的变量。 $request_length<namespace>_http_upstream_time_seconds上游响应时间的汇总向量(以秒为单位)。记录这些需要在NGINX中使用日志格式的变量专门启用。 $upstream_response_time<namespace>_http_upstream_time_seconds_hist与 相同,但为直方图向量。还需要日志格式的变量。 <namespace>_http_upstream_time_seconds$upstream_response_time<namespace>_http_response_time_seconds总响应时间的汇总向量(以秒为单位)。记录这些需要在NGINX中使用日志格式的变量专门启用。 $request_time<namespace>_http_response_time_seconds_hist与 相同,但为直方图向量。还需要日志格式的变量。 <namespace>_http_response_time_seconds$request_time
解释一下上面nginx变量的意思
-
$upstream_connect_time– 与上游服务器建立连接所花费的时间 -
$upstream_header_time– 从上游服务器建立连接和接收响应标头的第一个字节之间的时间 -
$upstream_response_time– 从上游服务器建立连接和接收响应正文的最后一个字节之间的时间 -
$request_time– 处理请求所花费的总时间具体可以看下这个文档链接
2.高级用法
日志匹配
从文件中读取
从日志文件读取时,只需要一个属性:files
单个匹配
namespace "test" {
source {
files = ["/var/log/nginx/access.log"]
// ...
}
}
范围匹配
namespace "test" {
source {
files = ["/var/log/nginx/*_access.log"]
// ...
}
}
从系统日志读取
导出器还可以打开并侦听 Syslog 端口,并从那里读取日志。配置工作原理如下:
namespace "test" {
source {
syslog {
listen_address = "udp://127.0.0.1:8514" (1)
format = "rfc3164" (2)
tags = ["nginx"] (3)
}
// ...
}
}
重新打标签
test-configuration-relabel.hcl 配置文件
--------------------------------------
port = 4040
enable_experimental = true
namespace "nginx" {
source {
files = [".behave-sandbox/access.log"]
}
format = "$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\""
relabel "user" {
from = "remote_user"
whitelist = ["foo", "bar"]
}
}
--------------------------------------
Feature: Config file allows relabel configurations
Scenario: Labels are added
Given a running exporter listening with configuration file "test-configuration-relabel.hcl"
When the following HTTP request is logged to "access.log"
"""
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-"
172.17.0.1 - foo [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-"
"""
Then the exporter should report value 1 for metric nginx_http_response_count_total{method="GET",status="200",user="foo"}
And the exporter should report value 1 for metric nginx_http_response_count_total{method="GET",status="200",user="other"}
上面这段配置的意思是重新打标签 新建user标签 来源是remote_user 这个字段(nginx的变量)匹配是foo 或者是bar 那么user字段是具体匹配的那个如果没有匹配到那么就是other
匹配多个日志
listen:
port: 4040
namespaces:
- name: nginx
source:
files:
- .behave-sandbox/access-1.log
format: "$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\""
histogram_buckets: [.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10]
- name: apache
source:
files:
- .behave-sandbox/access-2.log
format: "$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\""
-----------------------------
Feature: YAML Config file allows multiple namespaces
Scenario: Single request is counted
Given a running exporter listening with configuration file "test-configuration.yaml"
When the following HTTP request is logged to "access-1.log"
"""
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-"
"""
Then the exporter should report value 1 for metric nginx_http_response_count_total{method="GET",status="200"}
Scenario: Multiple requests to different files are counted
Given a running exporter listening with configuration file "test-configuration.yaml"
When the following HTTP request is logged to "access-1.log"
"""
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-"
"""
And the following HTTP request is logged to "access-2.log"
"""
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 400 612 "-" "curl/7.29.0" "-"
"""
Then the exporter should report value 1 for metric nginx_http_response_count_total{method="GET",status="200"}
And the exporter should report value 1 for metric apache_http_response_count_total{method="GET",status="400"}
可以同时匹配多个日志文件进行监控
添加自定义标签
port = 4040
namespace "nginx" {
source {
files = [".behave-sandbox/access.log"]
}
format = "$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\""
labels {
foo = "bar"
}
}
----------------------------------
Feature: Config file allows custom labels
Scenario: Labels are added
Given a running exporter listening with configuration file "test-configuration-labels.hcl"
When the following HTTP request is logged to "access.log"
"""
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-"
"""
Then the exporter should report value 1 for metric nginx_http_response_count_total{foo="bar",method="GET",status="200"}
对这个所有匹配的指标名称加一个标签 foo = "bar"
http_upstream_connect_time_seconds 分位数是如何计算的
Feature: Upstream response times are summarized
Scenario: Single request is summarized
Given a running exporter listening on "access.log" with upstream-time format
When the following HTTP request is logged to "access.log"
"""
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 10
"""
Then the exporter should report value 10 for metric nginx_http_upstream_time_seconds{method="GET",status="200",quantile="0.5"}
Scenario: .5 quantile of upstream time is computed
Given a running exporter listening on "access.log" with upstream-time format
When the following HTTP request is logged to "access.log"
"""
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 10
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 20
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 30
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 40
"""
Then the exporter should report value 20 for metric nginx_http_upstream_time_seconds{method="GET",status="200",quantile="0.5"}
Scenario: .5 quantile of upstream connect time is computed
Given a running exporter listening on "access.log" with upstream-connect-time format
When the following HTTP request is logged to "access.log"
"""
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 10 5
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 20 5
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 30 10
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 40 10
"""
Then the exporter should report value 5 for metric nginx_http_upstream_connect_time_seconds{method="GET",status="200",quantile="0.5"}
Scenario: .5 quantile of response time is computed
Given a running exporter listening on "access.log" with request-time format
When the following HTTP request is logged to "access.log"
"""
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 10
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 20
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 30
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 40
"""
Then the exporter should report value 20 for metric nginx_http_response_time_seconds{method="GET",status="200",quantile="0.5"}
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 10 5
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 20 5
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 30 10
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-" 40 10
"""
Then the exporter should report value 5 for metric nginx_http_upstream_connect_time_seconds{method="GET",status="200",quantile="0.5"}
举一个例子如上这个指标是计算这个指标的50分位数量 那么分位数是从小到大排序50分位是取中间那个也就是第2个 5,5,10,10 50分位数是5
http_response_size_bytes 和http_request_size_bytes大小是怎么来的
enable_experimental: true
listen:
port: 4040
namespaces:
- name: test
format: "$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" $bytes_sent $request_length"
source_files:
- .behave-sandbox/access.log
------------
Feature: Message sizes are counted
Scenario: Response body sizes are counted
Given a running exporter listening with configuration file "test-config-message-sizes.yaml"
When the following HTTP request is logged to "access.log"
"""
$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" $process_time $bytes_sent $request_length
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 1000 "-" "curl/7.29.0" 300 400
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 2000 "-" "curl/7.29.0" 300 500
"""
Then the exporter should report value 3000 for metric test_http_response_size_bytes{method="GET",status="200"}
Scenario: Request body sizes are counted
Given a running exporter listening with configuration file "test-config-message-sizes.yaml"
When the following HTTP request is logged to "access.log"
"""
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 1000 "-" "curl/7.29.0" 300 400
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 2000 "-" "curl/7.29.0" 300 500
"""
Then the exporter should report value 900 for metric test_http_request_size_bytes{method="GET",status="200"}
http_response_size_bytes是nginx body_bytes_sent变量累加
http_request_size_bytes 是nginx request_length 变量累加
syslog日志监控示例
listen:
port: 4040
namespaces:
- name: nginx
source:
syslog:
listen_address: udp://0.0.0.0:1234
tags:
- ""
format: "$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\""
----------------
Feature: Can read log entries from syslog
Scenario: Read from syslog
Given a running exporter listening with configuration file "test-config-syslog.yaml"
When the following HTTP request is logged to syslog on port 1234
"""
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-"
"""
Then the exporter should report value 1 for metric nginx_http_response_count_total{method="GET",status="200"}
Scenario: Read from syslog running on a UNIX socket
Given a running exporter listening with configuration file "test-config-syslog-unix.yaml"
When the following HTTP request is logged to syslog on socket /tmp/syslog.sock
"""
172.17.0.1 - - [23/Jun/2016:16:04:20 +0000] "GET / HTTP/1.1" 200 612 "-" "curl/7.29.0" "-"
"""
Then the exporter should report value 1 for metric nginx_http_response_count_total{method="GET",status="200"}
When the exporter is stopped
Then the socket /tmp/syslog.sock should not exist
动态重新标记
namespace "app1" {
// ...
relabel "request_uri" {
from = "request"
split = 2
separator = " " // (1)
// if enabled, only include label in response count metric (default is false)
only_counter = false
match "^/users/[0-9]+" {
replacement = "/users/:id"
}
match "^/profile" {
replacement = "/profile"
}
}
}
该属性是可选的;如果省略,空格字符 () 将被假定为分隔符。separator" "
如果找到匹配项,则替换原始值中相应匹配项的每次匹配项。否则,处理将继续检查以下匹配语句。replacement
用于重新标记的 YAML 配置的工作方式类似于 HCL 配置:
namespaces:
- name: app1
relabel_configs:
- target_label: request_uri
from: request
split: 2
separator: ' '
matches:
- regexp: "^/users/[0-9]+"
replacement: "/users/:id"
如果正则表达式包含组,则还可以使用值中那些组的匹配值:replacement
relabel "request_uri" {
from = "request"
split = 2
match "^/(users|profiles)/[0-9]+" {
replacement = "/$1/:id"
}
}
动态重新标记request_url作为?结束前作为标签的值
enable_experimental = true
listen {
port = 4040
}
namespace "nginx" {
source = {
files = [
"/var/log/nginx/access.log",
"/var/log/nginx/error.log",
]
}
format = "$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\" $upstream_response_time"
histogram_buckets = [.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10]
relabel "request_uri" {
from = "request"
split = 2
match "^(/[^?]{0,})*(.*)$" {
replacement = "$1"
}
}
}
nginx_http_response_count_total_counter{method="GET",request_uri="/?asiuhwf%20hiuoeahuoifaw",status="500"} 1
nginx_http_response_count_total_counter{method="GET",request_uri="/asdadsdsdas?asiuhwf%20hiuoeahuoifaw",status="404"} 2
匹配结果是:
nginx_http_response_count_total_counter{method="GET",request_uri="/asdadsdsdas",status="404"} 3
动态重新标记request_url作为标签的值
namespaces:
- name: app1
relabel_configs:
- target_label: request_uri
from: request
split: 2
matches:
- regexp: "^(.*)$"
replacement: "$1"
这种会导致指标根据url的增长导致数据越来越多
根据remote_addr 统计用户ip并且访问的url
listen:
port: 4040
address: "0.0.0.0"
consul:
enable: false
namespaces:
- name: my
relabel_configs:
- target_label: client_addr
from: remote_addr
- target_label: request_uri
from: request
split: 2
separator: ' '
matches:
- regexp: "^(\\/[^\\/\\?]+\\/[^\\/\\?]+)\\/.*$"
replacement: "$1"
format: "$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\" \"$request_length\" \"$upstream_response_time\" \"$request_time\" \"$upstream_cache_status\""
source:
files:
- /var/log/nginx/access.log
labels:
service: nginx-gray
histogram_buckets: [ .25, .5, 1, 2.5]
3. 添加到 prometheus 监控 instance
- job_name: "nginxlog"
static_configs:
- targets: ["127.0.0.1:4040"]
总结
prometheus-nginxlog-exporter 这个导出器实现功能还是很多的关键在于这个文档阅读如下基本想做啥都可以实现