promethues监控外部redis、mysql、clickhouse(二)

254 阅读3分钟

在前面的部署完成后才可以执行后面的操作,没有执行的话先执行这个:promethues详细部署包含alter|granfna

部署redis-export


apiVersion: apps/v1
kind: Deployment
metadata:
  name: redis-exporter
  namespace: bigdata
  labels:
    app: redis-exporter
spec:
  selector:
    matchLabels:
      app: redis-exporter
  replicas: 1
  strategy:
    rollingUpdate:
      maxSurge: 25%
      maxUnavailable: 25%
    type: RollingUpdate
  template:
    metadata:
      labels:
        app: redis-exporter
    spec:
      containers:
        - name:  redis-exporter
          image:  harbor.lll.com/base/redis-exporter:latest
          resources:
            requests:
              cpu: 100m
              memory: 100Mi
            limits:
              cpu: 100m
              memory: 100Mi
          env:
            - name: REDIS_PASSWORD
              value: 123456
            - name: REDIS_ADDR
              value: redis://192.168.211.21:6379
          ports:
            - containerPort:  80
              name: redis-exporter
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
      restartPolicy: Always
---
apiVersion: v1
kind: Service
metadata:
  name: redis-exporter
  namespace: bigdata
  labels:
    app: redis-exporter
spec:
  selector:
    app: redis-exporter
  ports:
    - name: redis-exporter
      protocol: TCP
      port: 9121

[]

部署mysql-export

apiVersion: apps/v1
kind: Deployment
metadata:
  name: mysql-exporter
  namespace: bigdata
spec:
  replicas: 1
  selector:
    matchLabels:
      k8s-app: mysql-exporter
  template:
    metadata:
      labels:
        k8s-app: mysql-exporter
    spec:
      containers:
      - name: mysql-exporter
        #image: registry.cn-chengdu.aliyuncs.com/qzcsbj/mysqld-exporter:v0.12.1
        image:  harbor.lll.com/prom/mysqld-exporter:lastest
        env:
         - name: DATA_SOURCE_NAME
         #root:root代表用户名和密码
           value: root:root@(192.168.1.1:3306)/
        imagePullPolicy: IfNotPresent
        ports:
        - containerPort: 9104
          name: http
---
apiVersion: v1
kind: Service
metadata:
  name: mysql-exporter
  namespace: bigdata
  labels:
    k8s-app: mysql-exporter
spec:
  type: ClusterIP
  selector:
    k8s-app: mysql-exporter
  ports:
  - name: mysql-exporter-api
    port: 9104
    protocol: TCP

部署ClickHouse-export

apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    k8s-app: clickhouse-exporter
  name: clickhouse-exporter
  namespace: bigdata
spec:
  replicas: 1
  selector:
    matchLabels:
      k8s-app: clickhouse-exporter
  template:
    metadata:
      labels:
        k8s-app: clickhouse-exporter
    spec:
      containers:
      - args:
        - -scrape_uri=http://192.168.1.1:8123/ # 对应 CLICKHOUSE 实例的地址信息
        #env:
        #  - name: CLICKHOUSE_USER
        #    value: "root"                    # 对应 CLICKHOUSE的账号
        #  - name: CLICKHOUSE_PASSWORD
        #    value: "xxxxxxxx"                # 对应 CLICKHOUSE的密码
        image: f1yegor/clickhouse-exporter:latest 
        imagePullPolicy: IfNotPresent
        name: clickhouse-exporter
        ports:
        - containerPort: 9116
          name: metric-port  # 这个名称在配置抓取任务的时候需要
---
apiVersion: v1
kind: Service
metadata:
  labels:
    app: clickhouse-exporter
  name: clickhouse-exporter
  namespace: bigdata
spec:
  type: ClusterIP
  ports:
  - name: clickhouse-exporter
    port: 9116
    #port: 9104
    protocol: TCP
    targetPort: 9116
  selector:
   k8s-app: clickhouse-exporter

部署完成后,开始写规则

apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: bigdata
data:
  prometheus.yml: |
    global:
      scrape_interval: 15s
      evaluation_interval: 30s
    scrape_configs:
      - job_name: "prometheus"
        static_configs:
          - targets: ["localhost:9090"]
      - job_name: 'redis_exporter'
        static_configs:
        - targets: ["redis-exporter1:9121"]
      - job_name: 'redis_exporter_targets'
        static_configs:
        - targets:
          - redis://192.168.1.1:7001
          - redis://192.168.1.1:7002
          - redis://192.168.1.2:7001
          - redis://192.168.1.2:7002
          - redis://192.168.1.3:7001
          - redis://192.168.1.3:7002
        metrics_path: /scrape
        relabel_configs:
        - source_labels: [__address__]
          target_label: __param_target
        - source_labels: [__param_target]
          target_label: instance
        - target_label: __address__
          replacement: redis-exporter1:9121
      - job_name: mysql_exporter # To get metrics about the mysql exporter’s targets
        params:
          # Not required. Will match value to child in config file. Default value is `client`.
          auth_module: [client.servers]
        static_configs:
          - targets:
            # All mysql hostnames or unix sockets to monitor.
            - 192.168.12.37:3306
            - 192.168.12.36:3306
            #- unix:///run/mysqld/mysqld.sock
        relabel_configs:
          - source_labels: [__address__]
            target_label: __param_target
          - source_labels: [__param_target]
            target_label: instance
          - target_label: __address__
            # The mysqld_exporter host:port
            replacement: mysql-exporter:9104
      - job_name: 'clickhouse_exporter'
        scrape_interval: 10s
        static_configs:
          - targets:
            #- 192.168.1.48:8123
            - 192.168.1.31:9116
            - 192.168.1.32:9116
            - 192.168.1.33:9116
            - 192.168.1.34:9116
        relabel_configs:
          - source_labels: [__address__]
            target_label: __param_target
          - source_labels: [__param_target]
            target_label: instance
          - target_label: __address__
            # The mysqld_exporter host:port
            replacement: clickhouse-exporter:9116
      - job_name: 'Prometheus-PushGateway'
        scrape_interval: 10s
        static_configs:
          - targets:
            - 192.168.1.24:8081
            - 192.168.1.25:9091
            - 192.168.1.26:9091
            - 192.168.1.27:9091
            - 192.168.1.28:9091
        relabel_configs:
          - source_labels: [__address__]
            target_label: __param_target
          - source_labels: [__param_target]
            target_label: instance
          - target_label: __address__
            # The mysqld_exporter host:port
            replacement: prometheus-pushgateway:9091
    rule_files:
      - /etc/prometheus/rules/*.yaml
    alerting:
      alertmanagers:
        - static_configs:
            - targets: ["alertmanager-svc:9093"]

先测试Redis服务

查看P8S是否加入成功

image.png

在 granfa 导入 763模板

image-20240205215606906.png

将rule 写了 p8s里面

image-20240205220459036

image-20240205220514954 然后重启 p8s pod 在 p8s 界面查了 alter

image-20240205224130165

rule 已经 加入进来

停止redis服务

image-20240205224405064

等待检测

image-20240205224433566

去alter 去看详细信息

image-20240205224518806

apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-rule
  labels:
    name: prometheus-rule
  namespace: bigdata
data:
  redis-rule.yaml: |-
    groups:
      - name: Redis
        rules:
          - alert: RedisDisconnectedSlaves
            expr: count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1
            for: 0m
            labels:
              severity: 1
              error_code: redis_slaves_miss_3
              server_tag: redis
            annotations:
              summary: Redis disconnected slaves (instance {{ $labels.instance }})
              description: "Redis not replicating for all slaves. Consider reviewing the redis replication status.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  mysql-rule.yaml: |-
    groups:
      - name: MySQLStatsAlert
        rules:
          - alert: MySQL is down
            expr: mysql_up == 0
            for: 1m
            labels:
                  severity: 1
                  server_tag: mysql
                  error_code: mysql_noup_down
            annotations:
              summary: "Instance {{ $labels.instance }} MySQL is down"
              description: "MySQL database is down. This requires immediate action!"
  clickhouse-rule.yaml: |-
    groups:
      - name: qps_too_high
        rules:
          - alert: clickhouse qps超出阈值
            expr: rate(clickhouse_query_total[1m]) > 100
            for: 1m
            labels:
                  severity: 1
                  server_tag: clickhouse
                  error_code: clickhouse qps超出阈值
            annotations:
              summary: "clickhouse qps超出阈值"
              description: "clickhouse qps超过阈值(100), qps: {{ $value }}"

重启promethuespod即可生效