下载Prometheus二进制文件
wget https://github.com/prometheus/prometheus/releases/download/v2.47.2/prometheus-2.47.2.linux-amd64.tar.gz
解压缩,查看文件内容
tar -xf prometheus-2.47.2.linux-amd64.tar.gz -C /usr/local/
root@node1:~/prometheus-2.47.2.linux-amd64# tree
.
├── console_libraries
│ ├── menu.lib
│ └── prom.lib
├── consoles
│ ├── index.html.example
│ ├── node-cpu.html
│ ├── node-disk.html
│ ├── node.html
│ ├── node-overview.html
│ ├── prometheus.html
│ └── prometheus-overview.html
├── LICENSE
├── NOTICE
├── prometheus # prometheus二进制程序文件
├── prometheus.yml
└── promtool # prometheus配置文件检查工具
创建软链接
root@node1:~# ln -s /usr/local/prometheus-2.47.2.linux-amd64/ /usr/local/prometheus
创建Service文件
root@node1:/usr/local/prometheus# cat /etc/systemd/system/prometheus.service
[Unit]
Description=Prometheus
After=network.target
[Service]
WorkingDirectory=/usr/local/prometheus
ExecStart=/usr/local/prometheus/prometheus --config.file="/usr/local/prometheus/prometheus.yml"
[Install]
WantedBy=multi-user.target
启动服务
systemctl daemon-reload
systemctl start prometheus.service
systemctl enable prometheus.service
访问服务
默认端口9090
安装node exporter
在每个需要采集数据的节点上下载node exporter
wget https://github.com/prometheus/node_exporter/releases/download/v1.6.1/node_exporter-1.6.1.linux-amd64.tar.gz
解压
tar -xf node_exporter-1.6.1.linux-amd64.tar.gz -C /usr/local/
创建软链接
ln -s /usr/local/node_exporter-1.6.1.linux-amd64/ /usr/local/node_exporter
创建Service文件
tee /etc/systemd/system/node_exporter.service << EOF
[Unit]
Description=Node Exporter
After=network.target
[Service]
ExecStart=/usr/local/node_exporter/node_exporter
[Install]
WantedBy=multi-user.target
EOF
启动服务
systemctl daemon-reload
systemctl start node_exporter.service
systemctl enable node_exporter.service
查看服务
可以看到服务监听在9100端口
root@node4:/usr/local/node_exporter# ss -tunlp
Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
tcp LISTEN 0 4096 *:9100 *:* users:(("node_exporter",pid=177028,fd=3))
修改Prometheus.yml配置文件,添加上node exporter的地址
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. 数据采集间隔时间,如果不配置默认为一分钟
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. 规则扫描间隔时间,如果不配置默认为一分钟
# scrape_timeout is set to the global default (10s). 抓取数据的超时时间
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics' 默认抓取的url路径为/metrics
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "node-exporter" # 定义一个job名称
static_configs: # 使用静态配置
# 需要采集数据的服务器列表
- targets: ["192.168.202.151:9100","192.168.202.152:9100","192.168.202.153:9100","192.168.202.154:9100"]
检查prometheus的配置文件是否合法
root@node1:/usr/local/prometheus# ./promtool check config prometheus.yml
Checking prometheus.yml
SUCCESS: prometheus.yml is valid prometheus config file syntax
重启Prometheus
systemctl restart prometheus
部署blackbox exporter
blackbox_exporter 是prometheus官方提供的一个exporter,可以通过HTTP,HTTPS,DNS,TCP和ICMP对被监控节点进行监控和数据采集
HTTP/HTTPS: URL/API可用性检测
TCP: 端口监听检测
ICMP: 主机存活检测
DNS:域名解析
下载二进制文件
wget https://github.com/prometheus/blackbox_exporter/releases/download/v0.24.0/blackbox_exporter-0.24.0.linux-amd64.tar.gz
解压并创建软链接
tar -xf blackbox_exporter-0.24.0.linux-amd64.tar.gz -C /usr/local/
ln -s /usr/local/blackbox_exporter-0.24.0.linux-amd64/ /usr/local/blackbox_exporter
创建Service文件
tee /etc/systemd/system/blackbox-exporter.service <<EOF
[Unit]
Description=Prometheus Blackbox Exporter
After=network.target
[Service]
Type=simple
User=root
Group=root
ExecStart=/usr/local/blackbox_exporter/blackbox_exporter \
--config.file=/usr/local/blackbox_exporter/blackbox.yml
Restart=on-failure
[Install]
WantedBy=multi-user.target
EOF
启动blackbox
systemctl daemon-reload
systemctl start blackbox-exporter.service
systemctl enable blackbox-exporter.service
查看服务
blackbox默认监听的端口为9115
root@node1:/usr/local/blackbox_exporter# ss -tunlp
Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
tcp LISTEN 0 4096 *:9115 *:* users:(("blackbox_export",pid=242127,fd=3))
tcp LISTEN 0 4096 *:9090 *:* users:(("prometheus",pid=190665,fd=7)) *:* users:(("kube-apiserver",pid=1745,fd=3))
tcp LISTEN 0 4096 *:9100 *:* users:(("node_exporter",pid=180489,fd=3))
修改prometheus配置文件,添加blackbox监控的url
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "node-exporter"
static_configs:
- targets: ["192.168.202.151:9100","192.168.202.152:9100","192.168.202.153:9100","192.168.202.154:9100"]
# monitor web site
- job_name: "http_status"
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets: ["http://www.bing.com","https://www.baidu.com"]
labels:
instance: http_status
group: web
relabel_configs:
- source_labels: [__address__] # relabel通过将__address__(当前目标地址)写入__param_target标签来创建一个label
target_label: __param_target # 监控目标http://www.bing.com,值为__address__的值
- source_labels: [__param_target] # 监控目标
target_label: url # 将监控目标与url创建一个label,此处的url值为__param_target里面的值
- target_label: __address__
replacement: 192.168.202.151:9115 # 写blackbox地址
检测下配置文件正确与否
root@node1:/usr/local/prometheus# ./promtool check config prometheus.yml
Checking prometheus.yml
SUCCESS: prometheus.yml is valid prometheus config file syntax
重启prometheus
systemctl restart prometheus.service