一、概述
Prometheus 最开始是由 SoundCloud 开发的开源监控告警系统,是 Google BorgMon 监控系统的开源版本。在 2016 年,Prometheus 加入 CNCF,成为继 Kubernetes 之后第二个被 CNCF 托管的项目。随着 Kubernetes 在容器编排领头羊地位的确立,Prometheus 也成为 Kubernetes 容器监控的标配。
二、使用 Helm 安装 Prometheus
地址:https://artifacthub.io/packages/helm/prometheus-community/prometheus
1.配置helm源
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
helm search repo prometheus-community/prometheus
2.解压
tar -xvf prometheus-22.6.6.tgz
[root@k8s-master prometheus]# ls
Chart.lock charts Chart.yaml README.md templates values.schema.json values.yaml
3.制作prometheus镜像
在多个环境中经常出现版本不一致的情况,上线的时候容易出现混乱(其中集成了dockerize工具)
vim Dockerfile
FROM alpine:latest
RUN apk add --no-cache openssl curl
ENV DOCKERIZE_VERSION v0.7.0
RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
&& tar -C /usr/local/bin -xzvf dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
&& rm dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz
RUN curl -LO https://github.com/prometheus/prometheus/releases/download/v2.45.0/prometheus-2.45.0.linux-amd64.tar.gz && \
tar -xvf prometheus-2.45.0.linux-amd64.tar.gz -C / && \
mv /prometheus-2.45.0.linux-amd64 /prometheus && \
mkdir /prometheus/config /prometheus/rules && \
rm prometheus-2.45.0.linux-amd64.tar.gz
WORKDIR /prometheus
ENTRYPOINT ["/prometheus/prometheus"]
[root@k8s-master alpine]# docker build -t prometheus:alpine-base-2.45.1 . (作为基础镜像)
[root@k8s-master prometheus]# ls
conf Dockerfile entrypoint.sh README.md rules
[root@k8s-master prometheus]# cat Dockerfile
[root@k8s-master prometheus]# cat Dockerfile
ARG VERISON
FROM prometheus:${VERISON:-alpine-base-2.45.1}
LABEL changelog="monitor prometheus"
MAINTAINER "xxxx@163.com"
COPY conf /prometheus/conf
COPY rules /prometheus/rules
COPY entrypoint.sh /prometheus/entrypoint.sh
ENTRYPOINT ["/prometheus/entrypoint.sh"]
[root@k8s-master prometheus]# cat entrypoint.sh
#!/bin/sh
# gender alert rule
dockerize -template /prometheus/rules/node.rules.template /prometheus/rules/node.rules.yml
# gender env config_file
if [ -n "$CONFIG_FILE" ];then
dockerize -template /prometheus/conf/template/${CONFIG_FILE} /prometheus/conf/${CONFIG_FILE}
fi
rm -rf /prometheus/data/lock
ENTRYPOINT /prometheus/prometheus --config.file=/prometheus/config/${CONFIG_FILE} \
--storage.tsdb.path=/prometheus/data \
--web.console.libraries=/prometheus/console_libraries \
--web.console.templates=/prometheus/consoles \
--web.enable-lifecycle \
--web.enable-admin-api
dockerize 会将.template文件渲染成相关的文件 例如prometheus.yml.template --> prometheus.yml 这样同一个镜像通过env(SVC_ALERTMANEGER)环境变量可以用于多个环境
[root@k8s-master prometheus]# cat conf/template/prometheus.yml.template
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
{{- $svc_alertmangers := default .ENV.SVC_ALERTMANEGER "alertmanger:9093"-}}
{{- $svc_alertmangers_list := split $svc_alertmangers "," -}}
{{ range $svc_alertmangers_list }}
- {{ . }}{{ end }}
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "/promtheus/rules/*.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]