Docker部署Prometheus+dingtalk+alertmanager告警

准备工作

拉取镜像

docker pull prom/node-exporter
docker pull grafana/grafana 
docker pull prom/prometheus 
docker pull prom/alertmanager 
docker pull timonwong/prometheus-webhook-dingtalk

一、安装node-exporter

docker run -d -p 9100:9100 --name node-exporter -v /home/node-exporter/proc:/host/proc:ro -v /home/node-exporter/sys:/host/sys:ro -v /home/node-exporter/:/rootfs:ro prom/node-exporter

http://192.168.0.201:9100/

二、安装Prometheus

1、配置prometheus.yml文件

[root@node1 prometheus]# vi /home/prometheus/opt/prometheus/prometheus.yml
global:
  scrape_interval:     60s
  evaluation_interval: 60s
scrape_configs:
  - job_name: prometheus
    static_configs:
      - targets: ['192.168.0.201:9190'] # 采取prometheus指标数据
        labels:
          instance: prometheus
  - job_name: linux
    static_configs:
      - targets: ['192.168.0.201:9100'] # 采取本地指标数据
        labels:
          instance: localhost

2、启动

docker run -d -p 9190:9090 --name prometheus -v /home/prometheus/opt/prometheus:/etc/prometheus prom/prometheus

三、启动webhook-dingtalk

docker run -d -p 8060:8060 --name webhook timonwong/prometheus-webhook-dingtalk

1、查询webhook映射,进入容器内修改配置文件

[root@node1 prometheus]# docker inspect webhook|grep Dir

2、进入mergerDir

[root@node1 prometheus]# cd /home/docker/overlay2/52933d06cee0e5135bf0bfc5907a581f8b19a480688af08d1c2b7ff2a8239e96/merged

3、进入配置文件夹

[root@node1 merged]# cd etc/prometheus-webhook-dingtalk/

4、修改配置文件

[root@node1 prometheus-webhook-dingtalk]# vi config.yml
## Request timeout
# timeout: 5s

## Uncomment following line in order to write template from scratch (be careful!)
#no_builtin_template: true

## Customizable templates path
#templates:
#  - contrib/templates/legacy/template.tmpl

## You can also override default template using `default_message`
## The following example to use the 'legacy' template from v0.3.0
#default_message:
#  title: '{{ template "legacy.title" . }}'
#  text: '{{ template "legacy.content" . }}'

## Targets, previously was known as "profiles"
targets:
  webhook1:
    url: https://oapi.dingtalk.com/robot/send?access_token=ed49*************************7239514b010270f
    # secret for signature
    secret: SEC**********************
  webhook2:
    url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx
  webhook_legacy:
    url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx
    # Customize template content
    message:
      # Use legacy template
      title: '{{ template "legacy.title" . }}'
      text: '{{ template "legacy.content" . }}'
  webhook_mention_all:
    url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx
    mention:
      all: true
  webhook_mention_users:
    url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx
    mention:
      mobiles: ['156xxxx8827', '189xxxx8325']
~

5、重启webhook

[root@node1 prometheus-webhook-dingtalk]# docker restart webhook

6、查看webhook日志

[root@node1 prometheus-webhook-dingtalk]# docker logs webhook

复制webhook1地址

四、启动alertmanager

1、创建alertmanager.yml映射

[root@node1 /]# sudo mkdir /home/alertmanager/opt/alertmanager/
[root@node1 /]# cd /home/alertmanager/opt/alertmanager/
[root@node1 alertmanager]# vi alertmanager.yml
global:
  resolve_timeout: 5m

route: # 告警路由配置,定义如何处理和发送告警
  receiver: webhook
  group_wait: 30s
  group_interval: 1m
  repeat_interval: 4h
  group_by: [alertname]
  routes:
  - receiver: webhook
    group_wait: 10s

receivers: # 告警接收者配置,定义如何处理和发送告警
- name: webhook
  webhook_configs:
  - url: http://192.168.0.201:8060/dingtalk/webhook1/send  # 告警 Webhook URL
    send_resolved: true # 是否发送已解决的告警。如果设置为 true,则在告警解决时发送通知

注意webhook的url是前面webhookq启动日志里有的地址。将localhost改成服务器对应的ip即可。

2、修改prometheus.yml文件

[root@node1 alertmanager]# vi /home/prometheus/opt/prometheus/prometheus.yml
global:
  scrape_interval:     60s
  evaluation_interval: 60s
# Alertmanager配置
alerting:
  alertmanagers:
    - static_configs:
        - targets: ["192.168.0.201:9093"]
# rule配置
rule_files:
  - "/etc/prometheus/rules.yml"
scrape_configs:
  - job_name: prometheus
    static_configs:
      - targets: ['192.168.0.201:9190'] # 采取prometheus指标数据
        labels:
          instance: prometheus
  - job_name: linux
    static_configs:
      - targets: ['192.168.0.201:9100'] # 采取本地指标数据
        labels:
          instance: localhost

3、启动

docker run -d -p 9093:9093 -v /home/alertmanager/opt/alertmanager/:/etc/alertmanager/ --name alertmanager prom/alertmanager

五、配置预警规则

[root@node1 alertmanager]# vi /home/prometheus/opt/prometheus/rules.yml
groups:
  - name: host_monitoring
    rules:
      - alert: 内存报警
        expr: netdata_system_ram_MiB_average{chart="system.ram",dimension="free",family="ram"} < 800
        for: 2m
        labels:
          team: node
        annotations:
          Alert_type: 内存报警
          Server: '{{$labels.instance}}'
          #summary: "{{$labels.instance}}: High Memory usage detected"
          explain: "内存使用量超过90%,目前剩余量为:{{ $value }}M"
          #description: "{{$labels.instance}}: Memory usage is above 80% (current value is: {{ $value }})"
      - alert: CPU报警
        expr: netdata_system_cpu_percentage_average{chart="system.cpu",dimension="idle",family="cpu"} < 20
        for: 2m
        labels:
          team: node
        annotations:
          Alert_type: CPU报警
          Server: '{{$labels.instance}}'
          explain: "CPU使用量超过80%,目前剩余量为:{{ $value }}"
          #summary: "{{$labels.instance}}: High CPU usage detected"
          #description: "{{$labels.instance}}: CPU usage is above 80% (current value is: {{ $value }})"
      - alert: 磁盘报警
        expr: netdata_disk_space_GiB_average{chart="disk_space._",dimension="avail",family="/"} < 4
        for: 2m
        labels:
          team: node
        annotations:
          Alert_type: 磁盘报警
          Server: '{{$labels.instance}}'
          explain: "磁盘使用量超过90%,目前剩余量为:{{ $value }}G"
      - alert: 服务告警
        expr: up == 0
        for: 2s
        labels:
          team: node
        annotations:
          Alert_type: 服务报警
          summary: 'instance {{$labels.instance}} down'
          description: "netdata服务已关闭"

六、重启prometheus

[root@node1 alertmanager]# docker restart prometheus

七、测试

[root@node1 alertmanager]# docker stop node-exporter

ps:通知内容待优化。。。

八、启动gafana

docker run -d -p 3000:3000 --name=grafana grafana/grafana

admin/admin

第一次登陆需要修改密码

1、设置数据源

2、设置dashboards

填入  点击load,即可下载Node-Exporter的dashboard

参考文章:

docker部署prometheus+grafana+alertmanager+dingtalk实现钉钉告警_prometheus钉钉告警_生夏夏夏的博客-CSDN博客

docker部署prometheus告警钉钉通知_POI操作Excel的技术博客_51CTO博客

posted @ 2023-09-28 09:01  小强找BUG  阅读(244)  评论(0)    收藏  举报