在前面的部署完成后才可以执行后面的操作,没有执行的话先执行这个:promethues详细部署包含alter|granfna
部署redis-export
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis-exporter
namespace: bigdata
labels:
app: redis-exporter
spec:
selector:
matchLabels:
app: redis-exporter
replicas: 1
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
labels:
app: redis-exporter
spec:
containers:
- name: redis-exporter
image: harbor.lll.com/base/redis-exporter:latest
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
cpu: 100m
memory: 100Mi
env:
- name: REDIS_PASSWORD
value: 123456
- name: REDIS_ADDR
value: redis://192.168.211.21:6379
ports:
- containerPort: 80
name: redis-exporter
volumeMounts:
- name: localtime
mountPath: /etc/localtime
volumes:
- name: localtime
hostPath:
path: /usr/share/zoneinfo/Asia/Shanghai
restartPolicy: Always
---
apiVersion: v1
kind: Service
metadata:
name: redis-exporter
namespace: bigdata
labels:
app: redis-exporter
spec:
selector:
app: redis-exporter
ports:
- name: redis-exporter
protocol: TCP
port: 9121
[]
部署mysql-export
apiVersion: apps/v1
kind: Deployment
metadata:
name: mysql-exporter
namespace: bigdata
spec:
replicas: 1
selector:
matchLabels:
k8s-app: mysql-exporter
template:
metadata:
labels:
k8s-app: mysql-exporter
spec:
containers:
- name: mysql-exporter
#image: registry.cn-chengdu.aliyuncs.com/qzcsbj/mysqld-exporter:v0.12.1
image: harbor.lll.com/prom/mysqld-exporter:lastest
env:
- name: DATA_SOURCE_NAME
#root:root代表用户名和密码
value: root:root@(192.168.1.1:3306)/
imagePullPolicy: IfNotPresent
ports:
- containerPort: 9104
name: http
---
apiVersion: v1
kind: Service
metadata:
name: mysql-exporter
namespace: bigdata
labels:
k8s-app: mysql-exporter
spec:
type: ClusterIP
selector:
k8s-app: mysql-exporter
ports:
- name: mysql-exporter-api
port: 9104
protocol: TCP
部署ClickHouse-export
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
k8s-app: clickhouse-exporter
name: clickhouse-exporter
namespace: bigdata
spec:
replicas: 1
selector:
matchLabels:
k8s-app: clickhouse-exporter
template:
metadata:
labels:
k8s-app: clickhouse-exporter
spec:
containers:
- args:
- -scrape_uri=http://192.168.1.1:8123/ # 对应 CLICKHOUSE 实例的地址信息
#env:
# - name: CLICKHOUSE_USER
# value: "root" # 对应 CLICKHOUSE的账号
# - name: CLICKHOUSE_PASSWORD
# value: "xxxxxxxx" # 对应 CLICKHOUSE的密码
image: f1yegor/clickhouse-exporter:latest
imagePullPolicy: IfNotPresent
name: clickhouse-exporter
ports:
- containerPort: 9116
name: metric-port # 这个名称在配置抓取任务的时候需要
---
apiVersion: v1
kind: Service
metadata:
labels:
app: clickhouse-exporter
name: clickhouse-exporter
namespace: bigdata
spec:
type: ClusterIP
ports:
- name: clickhouse-exporter
port: 9116
#port: 9104
protocol: TCP
targetPort: 9116
selector:
k8s-app: clickhouse-exporter
部署完成后,开始写规则
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: bigdata
data:
prometheus.yml: |
global:
scrape_interval: 15s
evaluation_interval: 30s
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]
- job_name: 'redis_exporter'
static_configs:
- targets: ["redis-exporter1:9121"]
- job_name: 'redis_exporter_targets'
static_configs:
- targets:
- redis://192.168.1.1:7001
- redis://192.168.1.1:7002
- redis://192.168.1.2:7001
- redis://192.168.1.2:7002
- redis://192.168.1.3:7001
- redis://192.168.1.3:7002
metrics_path: /scrape
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: redis-exporter1:9121
- job_name: mysql_exporter # To get metrics about the mysql exporter’s targets
params:
# Not required. Will match value to child in config file. Default value is `client`.
auth_module: [client.servers]
static_configs:
- targets:
# All mysql hostnames or unix sockets to monitor.
- 192.168.12.37:3306
- 192.168.12.36:3306
#- unix:///run/mysqld/mysqld.sock
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
# The mysqld_exporter host:port
replacement: mysql-exporter:9104
- job_name: 'clickhouse_exporter'
scrape_interval: 10s
static_configs:
- targets:
#- 192.168.1.48:8123
- 192.168.1.31:9116
- 192.168.1.32:9116
- 192.168.1.33:9116
- 192.168.1.34:9116
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
# The mysqld_exporter host:port
replacement: clickhouse-exporter:9116
- job_name: 'Prometheus-PushGateway'
scrape_interval: 10s
static_configs:
- targets:
- 192.168.1.24:8081
- 192.168.1.25:9091
- 192.168.1.26:9091
- 192.168.1.27:9091
- 192.168.1.28:9091
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
# The mysqld_exporter host:port
replacement: prometheus-pushgateway:9091
rule_files:
- /etc/prometheus/rules/*.yaml
alerting:
alertmanagers:
- static_configs:
- targets: ["alertmanager-svc:9093"]
先测试Redis服务
查看P8S是否加入成功
在 granfa 导入 763模板
将rule 写了 p8s里面
然后重启 p8s pod 在 p8s 界面查了 alter
rule 已经 加入进来
停止redis服务
等待检测
去alter 去看详细信息
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-rule
labels:
name: prometheus-rule
namespace: bigdata
data:
redis-rule.yaml: |-
groups:
- name: Redis
rules:
- alert: RedisDisconnectedSlaves
expr: count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1
for: 0m
labels:
severity: 1
error_code: redis_slaves_miss_3
server_tag: redis
annotations:
summary: Redis disconnected slaves (instance {{ $labels.instance }})
description: "Redis not replicating for all slaves. Consider reviewing the redis replication status.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
mysql-rule.yaml: |-
groups:
- name: MySQLStatsAlert
rules:
- alert: MySQL is down
expr: mysql_up == 0
for: 1m
labels:
severity: 1
server_tag: mysql
error_code: mysql_noup_down
annotations:
summary: "Instance {{ $labels.instance }} MySQL is down"
description: "MySQL database is down. This requires immediate action!"
clickhouse-rule.yaml: |-
groups:
- name: qps_too_high
rules:
- alert: clickhouse qps超出阈值
expr: rate(clickhouse_query_total[1m]) > 100
for: 1m
labels:
severity: 1
server_tag: clickhouse
error_code: clickhouse qps超出阈值
annotations:
summary: "clickhouse qps超出阈值"
description: "clickhouse qps超过阈值(100), qps: {{ $value }}"