prometheus-exporter

由于prometheusexporter太多了, 所以单独开了个文档

pg监控

postgres_exporter

单个数据库

postgres_exporter.sh
#!/bin/bash

start_pg_exporter() {
pg_host=$1
pg_port=$2
pg_user=$3
pg_password=$4
exporter_port=$5
docker kill postgresql-exporter-${exporter_port}
docker rm postgresql-exporter-${exporter_port}
docker run -d \
--restart=always \
-p ${exporter_port}:9187 \
--name postgresql-exporter-${exporter_port} \
-v /etc/localtime:/etc/localtime:ro \
-v /etc/timezone:/etc/timezone:ro \
-e DATA_SOURCE_NAME="postgresql://${pg_user}:${pg_password}@${pg_host}:${pg_port}/postgres?sslmode=disable" \
wrouesnel/postgres_exporter
}

pgid="hdy-nmg-server-001"
pg_host="10.0.18.2"
pg_port="5432"
pg_user="postgres"
pg_password=""
exporter_port=13101
start_pg_exporter ${pg_host} ${pg_port} ${pg_user} ${pg_password} ${exporter_port}
Prometheus配置增加
  # hdy-nmg-server-001
  - job_name: 'hdy-nmg-server-001'
    static_configs:
      - targets: ['127.0.0.1:13101']
        labels:
          type: postgresql
          instance: hdy-nmg-server-001

多个数据库

postgres_exporter_setup.sh
#!/bin/bash

mkdir -p config/
cat << EOF > config/postgres_exporter.yml
auth_modules:
  hdy-nmg-server-001:
    type: userpass
    userpass:
      username: postgres
      password: NoKgWU
    options:
      sslmode: disable
  aliyun-hz-server-master:
    type: userpass
    userpass:
      username: postgres
      password: 9VczV
    options:
      sslmode: disable
EOF

docker kill postgresql-exporter
docker rm postgresql-exporter
docker run -d --net=host \
--name postgresql-exporter \
-v /etc/timezone:/etc/timezone:ro \
-v /etc/localtime:/etc/localtime:ro \
--restart=always \
-v $(pwd)/config/:/etc/postgres-exporter/ \
quay.io/prometheuscommunity/postgres-exporter \
--config.file /etc/postgres-exporter/postgres_exporter.yml \
--web.listen-address=:13101
Prometheus配置增加
  - job_name: 'postgresql-exporter'
    scrape_interval: 15s
    scrape_timeout: 10s
    file_sd_configs:
      - files: ['/usr/local/prometheus/groups/postgresql/*.json','/usr/local/prometheus/groups/postgresql/*.yml']
    metrics_path: /probe
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [instance]
        target_label: __param_auth_module
      - target_label: __address__
        replacement: "10.0.18.2:13101"
prometheus/groups/postgresql/postgresql.yml
- targets: [ "10.0.18.2:5432"]
  labels:
    job_name: hdy-nmg-server-001
    instance: hdy-nmg-server-001
    saltid: hdy-nmg-server-001
    comment: "hdy-nmg-server-001"

- targets: [ "47.xxx.xxx.xxx:5432"]
  labels:
    job_name: aliyun-hz-server-master
    instance: aliyun-hz-server-master
    saltid: aliyun-hz-server-master
    comment: "aliyun-hz-server-master"

grafana的监控模板为9628

mysql监控

mysql_exporter

单个数据库

mysql_exporter.sh
#!/bin/bash

# hdy-nmg-server-001

port=10309

docker kill mysqld-exporte-${port}
docker rm mysqld-exporte-${port}

cat << EOF > .my.cnf
[client]
user=root
password=
EOF

docker run -d --name mysqld-exporte-${port} \
--net host \
--restart=always \
-v /etc/timezone:/etc/timezone:ro \
-v /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro \
-v $(pwd)/.my.cnf:/config/.my.cnf \
prom/mysqld-exporter:v0.15.0 \
--web.listen-address=:${port} \
--config.my-cnf="/config/.my.cnf"
Prometheus配置增加
  # hdy-nmg-server-001
  - job_name: 'mysql'
    static_configs:
      - targets: ['127.0.0.1:10309']
        labels:
          type: mysql
          instance: hdy-nmg-server-001

多个数据库

mysql_exporter_setup.sh
#!/bin/bash

# hdy-nmg-server-001

port=10309

docker kill mysqld-exporte-${port}
docker rm mysqld-exporte-${port}

cat << EOF > .my.cnf
[hdy-nmg-server-001]
user=root
password=
[aliyun-hz-server-master]
user=root
password=
EOF

docker run -d --name mysqld-exporte-${port} \
--net host \
--restart=always \
-v /etc/timezone:/etc/timezone:ro \
-v /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro \
-v $(pwd)/.my.cnf:/config/.my.cnf \
prom/mysqld-exporter:v0.15.0 \
--web.listen-address=:${port} \
--config.my-cnf="/config/.my.cnf"
Prometheus配置增加
  - job_name: 'mysql-exporter'
    scrape_interval: 15s
    scrape_timeout: 10s
    file_sd_configs:
      - files: ['/usr/local/prometheus/groups/mysql/*.json','/usr/local/prometheus/groups/mysql/*.yml']
    metrics_path: /probe
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [instance]
        target_label: __param_auth_module
      - target_label: __address__
        replacement: "10.0.18.2:10309"
prometheus/groups/mysql/mysql.yml
- targets: [ "10.0.18.2:3306"]
  labels:
    job_name: hdy-nmg-server-001
    instance: hdy-nmg-server-001
    saltid: hdy-nmg-server-001
    comment: "hdy-nmg-server-001"

- targets: [ "47.xxx.xxx.xxx:3306"]
  labels:
    job_name: aliyun-hz-server-master
    instance: aliyun-hz-server-master
    saltid: aliyun-hz-server-master
    comment: "aliyun-hz-server-master"

grafana的监控模板为7362

kafka监控

kafka_exporter

单个数据库

kafka_exporter.sh
#!/bin/bash

port=9308

docker rm -f kafka_exporter

docker run -id \
--restart=always \
--network=host \
--name=kafka_exporter \
danielqsj/kafka-exporter \
--kafka.server=1.0.0.1:9092 \
--kafka.server=1.0.0.2:9092 \
--kafka.server=1.0.0.3:9092
Prometheus配置增加
  - job_name: 'kafka_exporter'
    scrape_interval: 30s
    scrape_timeout: 10s
    static_configs:
      - targets: ['10.32.8.4:9308']

grafana的监控模板为7589

接口监控

blackbox_exporter

blackbox_exporter.sh
#!/bin/bash

docker stop blackbox_exporter
docker rm blackbox_exporter
docker run -d \
  --restart=always \
  -p 9115:9115 \
  --name blackbox_exporter \
  -v $(pwd)/config:/config \
  -v /etc/timezone:/etc/timezone:ro \
  -v /etc/localtime:/etc/localtime:ro \
  quay.io/prometheus/blackbox-exporter:latest --config.file=/config/blackbox.yml
config/blackbox.yml
modules:
  # 通用get
  http_2xx:
    prober: http
    timeout: 35s
    http:
      valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
      valid_status_codes: []  # Defaults to 2xx
      method: GET
      preferred_ip_protocol: "ip4" # defaults to "ip6"
      ip_protocol_fallback: false  # no fallback to "ip6"
  # 无参post
  http_post_2xx:
    prober: http
    timeout: 35s
    http:
      method: POST
      headers:
        Content-Type: application/json
Prometheus配置增加
  - job_name: 'blackbox-exporter'
    scrape_interval: 60s
    scrape_timeout: 35s
    file_sd_configs:
      - files: ['/usr/local/prometheus/groups/blackbox/*.json','/usr/local/prometheus/groups/blackbox/*.yml']
    metrics_path: /probe
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [module]
        target_label: __param_module
      - source_labels: [address]
        target_label: address
      - target_label: __address__
        replacement: "127.0.0.1:9115"
prometheus/groups/blackbox/blackbox.yml
- targets: [ "http://10.0.18.2:8008/api/video_type/list" ]
  labels:
    module: "http_2xx"
    address: "http://10.0.18.2:8008/api/video_type/list"
    instance: "wx-video"

redis监控

redis_exporter

redis_exporter.sh
#!/bin/bash

port=9121

docker rm -f redis-exporter
docker run -d --name redis-exporter \
--restart=always \
-v $(pwd)/config/redis_exporter.json:/data/redis_exporter.json \
-e REDIS_PASSWORD_FILE=/data/redis_exporter.json \
-p ${port}:9121 \
oliver006/redis_exporter:v1.60.0
config/redis_exporter.json
{
  "redis://192.168.1.1:6379": "password",
  "redis://192.168.1.1:6380": ""
}
prometheus/groups/redis/redis.yml
- targets:
    - 'redis://192.168.1.1:6379'
  labels:
    saltid: test
    redisid: redis1
    instance: redis1
    comment: 测试1

- targets:
    - 'redis://192.168.1.1:6380'
  labels:
    saltid: test
    redisid: redis2
    instance: redis2
    comment: 测试2
Prometheus配置增加
  - job_name: 'redis-exporter'
    scrape_interval: 15s
    scrape_timeout: 10s
    file_sd_configs:
      - files: ['/usr/local/prometheus/groups/redis/redis.json','/usr/local/prometheus/groups/redis/redis.yml']
    metrics_path: /scrape
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - target_label: __address__
        replacement: "192.168.1.1:9121"

grafana的监控模板为11835

haproxy监控

prometheus/groups/haproxy/haproxy.yml
- targets: [ "192.168.1.1:8405"]
  labels:
    job_name: ha-test-1
    instance: ha-test-1
    saltid: test
    comment: 测试
Prometheus配置增加
  - job_name: 'haproxy-exporter'
    scrape_interval: 15s
    scrape_timeout: 10s
    file_sd_configs:
      - files: ['/usr/local/prometheus/groups/haproxy/haproxy.json','/usr/local/prometheus/groups/haproxy/haproxy.yml']

grafana的监控模板为12693

pve监控

pve_exporter

pve_exporter.sh
#!/bin/bash

port=9221

docker kill pve-exporter
docker rm pve-exporter

docker run -d \
--name pve-exporter \
--restart=always \
--network=host \
-v $(pwd)/groups/pve/pve.yml:/etc/prometheus/pve.yml \
buyfakett/pve_exporter
config/pve_exporter.yml
default:
  user: root@pam
  password: password
  # Optional: set to false to skip SSL/TLS verification
  verify_ssl: false
Prometheus配置增加
  - job_name: 'pve_exporter'
    scrape_interval: 15s
    scrape_timeout: 10s
    file_sd_configs:
      - files: ['/usr/local/prometheus/groups/pve/pve.json','/usr/local/prometheus/groups/pve/pve.yml']
    relabel_configs:
      - source_labels: [target]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
prometheus/groups/pve/pve.yml
- targets: [ "172.168.1.1:9221"]
  labels:
    module: default
    cluster: dc-cn-sy-pve-003
    node: dc-cn-sy-pve-003
    target: '172.168.1.1'

nginx监控

使用buyfakett/openresty:1.27.1.2-vts_0.2.5-ngx_waf_10.1.2镜像

Prometheus配置增加
  - job_name: 'nginx-host-exporter'
    scrape_interval: 15s
    scrape_timeout: 10s
    metrics_path: '/status/format/prometheus'
    file_sd_configs:
      - files: ['/usr/local/prometheus/groups/nginx-host/*.json','/usr/local/prometheus/groups/nginx-host/*.yml']
prometheus/groups/nginx/nginx.yml
- targets: [ "192.168.1.1:9145"]
  labels:
    job_name: nginx
    instance: nginx
    app: nginx
    comment: "test"

grafana的监控模板为9785

linux监控

node_exporter

Prometheus配置增加
  - job_name: 'node-exporter'
    scrape_interval: 15s
    scrape_timeout: 10s
    file_sd_configs:
      - files: ['/usr/local/prometheus/groups/node_exporter/*.json','/usr/local/prometheus/groups/node_exporter/*.yml']
prometheus/groups/node_exporter/nodes.yml
- targets: [ "192.168.1.1:9100"]
  labels:
    job_name: server-001
    instance: server-001
    comment: "node-001"

grafana的监控模板为1860

windows监控

windows_exporter

exporter安装方法

Prometheus配置增加
  - job_name: 'windows-exporter'
    scrape_interval: 30s
    scrape_timeout: 10s
    file_sd_configs:
      - files: ['/usr/local/prometheus/groups/windows_exporter/*.json','/usr/local/prometheus/groups/windows_exporter/*.yml']
prometheus/groups/windows_exporter/windows.yml
- targets: [ "127.0.0.1:9182"]
  labels:
    job_name: test-server-001
    instance: test-server-001
    saltid: test-server-001

grafana的监控模板为20763

hetzner storagebox监控

hetzner_storagebox_exporter

hetzner_storagebox_exporter.sh
#!/bin/bash
name=storagebox-exporter

docker rm -f ${name}
docker run -d \
  --name ${name} \
  --network=host \
  -e HETZNER_TOKEN="your-api-token" \
  ghcr.io/crstian19/prometheus-storagebox-exporter:latest
Prometheus配置增加
  - job_name: 'storagebox-exporter'
    scrape_interval: 5m
    scrape_timeout: 30s
    static_configs:
      - targets: ['localhost:9509']

grafana的监控模板为24537

数据库监控

sql_exporter

可以监控数据库是否有新数据

sql_exporter.sh
#!/bin/bash

# https://github.com/burningalchemist/sql_exporter

port=9399

docker rm -f sql_exporter

mkdir -p config/

docker run -d --net host \
--name sql_exporter \
--restart=always \
-v /etc/localtime:/etc/localtime:ro \
-v /etc/tiemzone:/etc/timezone:ro \
-v ./config/:/etc/sql_exporter/ \
registry.cn-hangzhou.aliyuncs.com/buyfakett/sql_exporter \
-config.file /etc/sql_exporter/sql_exporter.yml \
-web.listen-address ":${port}"
config/sql_exporter.yml
# Global settings and defaults.
global:
  scrape_timeout_offset: 500ms
  min_interval: 0s
  max_connections: 3
  max_idle_connections: 3
  max_connection_lifetime: 10m

# The target to monitor and the list of collectors to execute on it.
target:
  name: "prices_db"
  data_source_name: 'postgres://postgres:password@192.168.1.1:5432/gps?sslmode=disable'
  collectors: [moto_data]

# Collector definition files.
collector_files:
  - "collectors/*.collector.yml"
config/collectors/moto_data.collector.yml
collector_name: moto_data

metrics:
  - metric_name: moto_data
    type: counter
    help: '5分钟内,有没有新数据'
    static_labels:
      database_id: esxi-pg-001
    values: [counter]
    query: |
      SELECT CASE
           WHEN EXISTS (
                  SELECT 1
                  FROM trip_information_29
                  WHERE createtime >= NOW() - INTERVAL '5 minutes'
              ) THEN 1
           ELSE 0
       END AS counter;
Prometheus配置增加
  - job_name: 'sql_exporter'
    scrape_interval: 15s
    scrape_timeout: 10s
    metrics_path: /metrics
    static_configs:
      - targets: ['192.168.1.1:9399']

检测表是否5分钟没有新增数据(查询结果用1或0显示)

postgres
mysql
clickhouse
SELECT CASE
WHEN EXISTS (
SELECT 1
FROM trip_information_29
WHERE createtime >= NOW() - INTERVAL '5 minutes'
) THEN 1
ELSE 0
END AS counter;