Grafana v11.1.3 配置告警规则并配置飞书告警

流程

  1. 启动webhook服务
  2. 创建飞书告警群
  3. 添加自定义机器人
  4. 配置联络点
  5. 配置grafana内置alertmanage配置
  6. 配置大屏
  7. 配置规则
  8. 告警通知

编写webhook服务,并启动它

相关代码

send_feishu.py

from flask import Flask, request, jsonify
import json
import yaml
import sys
import os

app = Flask(__name__)
import json
import datetime
import requests
from urllib.parse import urlparse, urlunparse


# utc转cst
def utc2cst(utc_time_str):
    try:
        # 将字符串转换为 datetime 对象
        utc_time = datetime.datetime.strptime(utc_time_str, "%Y-%m-%dT%H:%M:%SZ")

    except ValueError:
        return utc_time_str
    # 转换为 CST 时间(UTC+8)
    cst_time = utc_time + datetime.timedelta(hours=8)
    # 格式化输出
    cst_time_str = cst_time.strftime("%Y-%m-%d %H:%M:%S")
    return cst_time_str


def alarm_label_setting(k, v):
    告警标签 = {'tag': 'column_set', 'flex_mode': 'none', 'background_style': 'grey', 'columns': []}
    columns = [
        {'tag': 'column', 'width': 'weighted', 'weight': 1, 'vertical_align': 'center',
         'elements': [
             {'tag': 'markdown', 'content': f'**{k}**'
              }
         ]
         },
        {'tag': 'column', 'width': 'weighted', 'weight': 1, 'vertical_align': 'top', 'elements': [
            {'tag': 'div', 'text': {'content': f'{v}', 'tag': 'plain_text'
                                    }
             }
        ]
         }
    ]
    告警标签['columns'] = columns
    return 告警标签


def feishu_alert_template(data, webhook_url, labels, grafana_host):
    feishu_body = {
        "elements": [
            {
                "alt": {
                    "content": "",
                    "tag": "plain_text"
                },
                "img_key": "img_v3_025m_a0e554c6-fa84-4033-9283-31632163909g",
                "tag": "img"
            },
            {
                "tag": "hr"
            },
            {
                "tag": "markdown",
                "content": "**告警项**"
            },

            {
                "tag": "action",
                "actions": [
                    {
                        "tag": "button",
                        "text": {
                            "tag": "plain_text",
                            "content": "快速设置告警沉默"
                        },
                        "url": "https://open.feishu.cn/document",
                        "type": "primary"
                    },
                    {
                        "tag": "button",
                        "text": {
                            "tag": "plain_text",
                            "content": "修改告警规则"
                        },
                        "url": "http://uat-monitor.voneyun.com/#/editRule?id=448",
                        "type": "default"
                    },
                    {
                        "tag": "button",
                        "text": {
                            "tag": "plain_text",
                            "content": "查询告警规则"
                        },
                        "url": "http://10.250.49.35:10952/graph?g0.expr=up%7Bjob%3D~%22cmdb_vm_node.%2A%22%7D+%3D%3D+0&g0.tab=1",
                        "type": "default"
                    }
                    ,
                    {
                        "tag": "button",
                        "text": {
                            "tag": "plain_text",
                            "content": "查看告警大屏"
                        },
                        "url": "http://10.250.49.35:10952/graph?g0.expr=up%7Bjob%3D~%22cmdb_vm_node.%2A%22%7D+%3D%3D+0&g0.tab=1",
                        "type": "default"
                    }
                ]
            }
        ],
        "header": {
            "template": "red",
            "title": {
                "content": "灾难-CPU使用率过高",
                "tag": "plain_text"
            }
        }
    }
    groupLabels = data.get('groupLabels')
    commonLabels = data.get('commonLabels')
    grafana_folder = data.get('groupLabels').get('grafana_folder', '')
    告警等级 = groupLabels.get("level", '')
    unit = commonLabels.get("unit", '')
    alertname = groupLabels["alertname"]
    today = datetime.datetime.now()
    printf_today = today.strftime('%Y-%m-%d %H:%M:%S')
    data['printf_today'] = printf_today
    n = 0

    # 告警查询按钮修改
    generatorURL_old = data.get('alerts')[0].get('generatorURL')
    _parsed_url = urlparse(generatorURL_old)
    generatorURL = urlunparse(_parsed_url._replace(netloc=grafana_host))

    # 告警沉默按钮修改
    silenceURL_old = data.get('alerts')[0].get('silenceURL')
    _parsed_url = urlparse(silenceURL_old)
    silenceURL = urlunparse(_parsed_url._replace(netloc=grafana_host))  # 告警沉默按钮修改

    # 设置告警查询告警大屏
    panelURL_old = data.get('alerts')[0].get('panelURL')
    _parsed_url = urlparse(panelURL_old)
    panelURL = urlunparse(_parsed_url._replace(netloc=grafana_host))

    edit_rule_url = f"http://192.168.10.76:3000/alerting/list?search={alertname}"
    feishu_body['elements'][3]['actions'][0]['url'] = silenceURL
    feishu_body['elements'][3]['actions'][1]['url'] = edit_rule_url
    feishu_body['elements'][3]['actions'][2]['url'] = generatorURL
    feishu_body['elements'][3]['actions'][3]['url'] = panelURL

    # 告警文本修改

    告警文本 = {'tag': 'column_set', 'flex_mode': 'none', 'background_style': 'grey', 'columns': []}
    columns = [
        {'tag': 'column', 'width': 'weighted', 'weight': 1, 'vertical_align': 'top', 'elements': [
            {'tag': 'markdown', 'content': f'**告警时间**\n{printf_today}'}]},
        {'tag': 'column', 'width': 'weighted', 'weight': 1, 'vertical_align': 'top', 'elements': [
            {'tag': 'markdown', 'content': f'**告警分组**\n{grafana_folder}'}]}
    ]
    告警文本['columns'] = columns
    feishu_body['elements'].insert(1, 告警文本)

    # 告警标题修改

    if data.get('status') == 'resolved':
        feishu_body['header']['template'] = 'green'
        feishu_body['header']['title']['content'] = f"😁告警恢复-(数量:{len(data['alerts'])})-{alertname}"
    else:
        if 告警等级 == '警告':
            feishu_body['header']['template'] = 'yellow'
            feishu_body['header']['title']['content'] = f"😢{告警等级}(数量:{len(data['alerts'])})-{alertname}"
        elif 告警等级 == '灾难':
            feishu_body['header']['template'] = 'red'
            feishu_body['header']['title']['content'] = f"😢😢😢{告警等级}(数量:{len(data['alerts'])})-{alertname}"
        elif 告警等级 == '严重':
            feishu_body['header']['template'] = 'red'
            feishu_body['header']['title']['content'] = f"😢😢{告警等级}(数量:{len(data['alerts'])})-{alertname}"
        else:
            feishu_body['header']['template'] = 'blue'
            feishu_body['header']['title']['content'] = f"{告警等级}(数量:{len(data['alerts'])})-{alertname}"

    # 告警标签修改
    if data.get('status') == 'resolved':
        labels.append('时间')

    for alert in data.get('alerts'):

        for k, v in alert.get('labels').items():
            if k in labels:
                告警标签 = alarm_label_setting(k, v)
                feishu_body['elements'].insert(n + 4, 告警标签)
                n = n + 1
        else:
            for k, v in alert.get('annotations').items():
                if data.get('status') == 'resolved':

                    if k == 'state':
                        pass
                    else:
                        告警标签 = alarm_label_setting(f"annotations.{k}", v)
                        feishu_body['elements'].insert(n + 4, 告警标签)
                        n = n + 1
                else:
                    告警标签 = alarm_label_setting(k, v)
                    feishu_body['elements'].insert(n + 4, 告警标签)
                    n = n + 1
            # 告警值处理
            try:
                for k, v in alert['values'].items():
                    if k == 'C':
                        当前状态值 = f"{round(v, 2)}{unit}"
                        告警标签 = alarm_label_setting(f"当前状态值", 当前状态值)
                        feishu_body['elements'].insert(n + 4, 告警标签)
                        n = n + 1
            except AttributeError as e:
                print('告警值处理异常', e)
            # 恢复告警,开始结束时间显示
            if data.get('status') == 'resolved':
                startsAt = utc2cst(alert.get('startsAt', '0000-00-00T00:00:00Z'))
                endsAt = utc2cst(alert.get('endsAt', '0000-00-00T00:00:00Z'))
                告警标签 = alarm_label_setting(f"startsAt", startsAt)
                feishu_body['elements'].insert(n + 4, 告警标签)
                n = n + 1
                告警标签 = alarm_label_setting(f"endsAt", endsAt)
                feishu_body['elements'].insert(n + 4, 告警标签)
                n = n + 1

        feishu_body['elements'].insert(n + 4, {"tag": "hr"})
        n = n + 1

    # 去掉图片
    feishu_body['elements'].pop(0)

    body = {
        "msg_type": "interactive",
        "card": feishu_body
    }
    response = requests.request(method='POST', json=body, url=webhook_url)
    return response


# 读取配置文件
with open(f'{os.getcwd()}/config.yml', encoding='utf-8') as f:
    config = yaml.safe_load(f)


@app.route('/webhook/feishu/', methods=['POST'])
def receive_data():
    data = request.get_json()
    webhook = request.args.get('webhook')
    formatted_json = json.dumps(data, ensure_ascii=False)
    print(formatted_json)
    # with open(f'{os.getcwd()}/data.json', 'a', encoding='utf-8') as f:
    #     today = datetime.datetime.now()
    #     printf_today = today.strftime('%Y-%m-%d %H:%M:%S')
    #     f.write(printf_today + ' | ' + formatted_json + '\n')

    labels = config.get('labels')
    print(feishu_alert_template(data, webhook, labels=labels, grafana_host=config.get('grafana_host')))
    return jsonify({"status": "success"})


if __name__ == "__main__":
    port = config.get('port', 8000)
    # 绑定到所有可用的网卡
    app.run(host='0.0.0.0', port=port, debug=True)

config.yml

# 需要显示的标签列表
labels:
  - 'FIRM'
  - 'host_type'
  - 'hostname'
  - 'device'
  - 'ip'
  - 'env'
  - 'pod'
  - 'topic'
  - 'consumergroup'
  - 'namespace'
  - mountpoint
  - startsAt
  - endsAt

grafana_host: '192.168.10.76:3000'
port: 8081

requirements.txt

flask
requests
PyYAML

将config.yml与send_feishu.py放在同一级目录下

启动服务

python3 send_feishu.py

创建飞书告警群,获取webhook地址

1. 创建一个飞书群,将相关的人员拉到群里。

2. 添加群机器人

在群设置中找到,群机器人设置,添加机器人。

记录飞书机器人的Webhook 地址

配置联络点

image

image

url示例:

http://192.168.10.76:8081/webhook/feishu/?webhook=https://open.feishu.cn/open-apis/bot/v2/hook/7b68dce5-4263-4438-a586-caf6053e083

配置grafana内置alertmanage配置

image

image

配置监控告警大屏

image

配置告警规则

image

image

注意:确保查询结果在表达式C上。否则会导致告警内容无法拿到最新实时结果

配置成功后

image

告警通知

告警通知样例截图

image

posted @ 2025-02-13 14:21  Tenderness、  阅读(1153)  评论(10)    收藏  举报