随笔- 32  评论- 4  文章- 0 

Prometheus+Alertmanager+Grafana监控组件容器化部署

直接上部署配置文件

docker-compose.yml

version: '3'

networks:
 monitor:
  driver: bridge

services:
 prometheus:
   image: prom/prometheus
   container_name: prometheus
   hostname: prometheus
   restart: always
   volumes:
     - /data/monitor/prometheus.yml:/etc/prometheus/prometheus.yml
     - /data/monitor/prom_db:/prometheus
     - /data/monitor/prom_rules:/etc/prometheus/rules
   ports:
     - "9090:9090"
   networks:
     - monitor

 grafana:
   image: grafana/grafana
   container_name: grafana
   hostname: grafana
   restart: always
   volumes:
     - /data/monitor/ga_data:/var/lib/grafana
   environment:
     - GF_SECURITY_ADMIN_PASSWORD=admin@123
   ports:
     - "3000:3000"
   networks:
     - monitor
 node-exporter:
  image: quay.io/prometheus/node-exporter
  container_name: node-exporter
  hostname: node-exporter
  restart: always
  ports:
    - "9100:9100"
  networks:
    - monitor

 alertmanager:
   image: prom/alertmanager
   container_name: alertmanager
   volumes:
     - /data/monitor/alertmanager.yml:/etc/alertmanager/alertmanager.yml
   ports:
     - "9093:9093"
   restart: always
   networks:
     - monitor

promethues.yml

global:
  scrape_interval:     60s
  evaluation_interval: 60s
scrape_configs:
  - job_name: prom-server
    static_configs:
      - targets: ['localhost:9090']
        labels:
          instance: prometheus
  - job_name: "node_exporter"
    static_configs:
      - targets:
        - localhost:9100
alerting:
  alertmanagers:
  - static_configs:
    - targets: ["loclhost:9093"]
rule_files:
  - /etc/prometheus/rules/rules.yml

alertmanager.yml

global:
  smtp_smarthost: 'smtp.126.com:25'
  smtp_from: 'XXX@126.com'
  smtp_auth_username: 'XXXXX'
  smtp_auth_password: 'XXXXX'
  smtp_require_tls: false
receivers:
  - name: default-receiver
    email_configs:
    - to: "battlescars@qq.com"
      require_tls: false
      send_resolved: true
route:
  group_by: ['alertname', 'cluster', 'service']
  group_wait: 10s
  group_interval: 5m
  receiver: default-receiver
  repeat_interval: 3h

rule.yml

groups:
  - name: response-rule
    rules:
    - alert: NodeDiskUsageException
      expr: (1 - (node_filesystem_free_bytes{mountpoint=~"/|/logs|/backup|/home"} / node_filesystem_size_bytes{mountpoint=~"/|/logs|/backup|/home"})) * 100 > 75
      labels:
        ai_mon: node
      annotations:
        description: 服务器{{$labels.instance}}磁盘{{$labels.device}}空间占用比例为{{$value}}%, 大于阈值75%
        summary: 磁盘占用超标
posted on 2020-01-15 19:29  BattleScars  阅读(...)  评论(...编辑  收藏