docker-compose部署prometheus+grafana+alertmanager
cat docker-compose.yml如下
version: '2'
services:
# 添加 普罗米修斯服务
prometheus:
# Docker Hub 镜像
image: prom/prometheus:latest
# 容器名称
container_name: Myprometheus
# 容器内部 hostname
hostname: prometheus
# 容器支持自启动
restart: always
# 容器与宿主机 端口映射
ports:
- '9090:9090'
# 将宿主机中的config文件夹,挂载到容器中/config文件夹
volumes:
- './prometheus/config:/config'
- './prometheus/data/prometheus:/prometheus/data'
# 指定容器中的配置文件
command:
- '--config.file=/config/prometheus.yml'
# 支持热更新
- '--web.enable-lifecycle'
# 添加告警模块
alertmanager:
image: prom/alertmanager:latest
container_name: Myalertmanager
hostname: alertmanager
restart: always
ports:
- '9093:9093'
volumes:
- './prometheus/config:/config'
- './prometheus/data/alertmanager:/alertmanager/data'
command:
- '--config.file=/config/alertmanager.yml'
# 添加监控可视化面板
grafana:
image: docker.xxxxx.com/library/grafana:v20191225
container_name: Mygrafana
hostname: grafana
restart: always
ports:
- '3000:3000'
volumes:
# 配置grafana 邮件服务器
- './grafana/config/grafana.ini:/etc/grafana/grafana.ini'
- './grafana/data/grafana:/var/lib/grafana'
prometheus.yml配置文件如下
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'promethus'
static_configs:
- targets: ['192.168.1.100:9090']
labels:
instance: 'Monitor-Service-01'
platform: 'master'
- job_name: 'system'
static_configs:
- targets: ['192.168.1.101:9100']
labels:
instance: 'Monitor Service-01'
platform: 'worker'
- job_name: 'Server'
static_configs:
- targets: ['192.168.2.100:9100']
labels:
instance: 'Service-web01'
platform: 'esc01'
- targets: ['192.168.2.101:9101']
labels:
instance: 'Service-db01'
platform: 'esc01'
- targets: ['192.168.2.102:9102']
labels:
instance: 'Service-db02'
platform: 'esc01'
- targets: ['192.168.2.103:9103']
labels:
instance: 'Service-dbpool'
platform: 'esc01'
alertmanager.yml配置信息如下
global:
resolve_timeout: 1m
# The smarthost and SMTP sender used for mail notifications.
#smtp_smarthost: ''
#smtp_from: ''
#smtp_auth_username: ''
#smtp_auth_password: ''
route:
receiver: 'default-receiver'
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
#group_by: ['alertname']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
# group_wait: 5s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
# group_interval: 30s
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 1m
receivers:
- name: 'default-receiver'

浙公网安备 33010602011771号