Grafana v11.1.3 配置告警规则并配置飞书告警
流程
- 启动webhook服务
- 创建飞书告警群
- 添加自定义机器人
- 配置联络点
- 配置grafana内置alertmanage配置
- 配置大屏
- 配置规则
- 告警通知
编写webhook服务,并启动它
相关代码
send_feishu.py
from flask import Flask, request, jsonify
import json
import yaml
import sys
import os
app = Flask(__name__)
import json
import datetime
import requests
from urllib.parse import urlparse, urlunparse
# utc转cst
def utc2cst(utc_time_str):
try:
# 将字符串转换为 datetime 对象
utc_time = datetime.datetime.strptime(utc_time_str, "%Y-%m-%dT%H:%M:%SZ")
except ValueError:
return utc_time_str
# 转换为 CST 时间(UTC+8)
cst_time = utc_time + datetime.timedelta(hours=8)
# 格式化输出
cst_time_str = cst_time.strftime("%Y-%m-%d %H:%M:%S")
return cst_time_str
def alarm_label_setting(k, v):
告警标签 = {'tag': 'column_set', 'flex_mode': 'none', 'background_style': 'grey', 'columns': []}
columns = [
{'tag': 'column', 'width': 'weighted', 'weight': 1, 'vertical_align': 'center',
'elements': [
{'tag': 'markdown', 'content': f'**{k}**'
}
]
},
{'tag': 'column', 'width': 'weighted', 'weight': 1, 'vertical_align': 'top', 'elements': [
{'tag': 'div', 'text': {'content': f'{v}', 'tag': 'plain_text'
}
}
]
}
]
告警标签['columns'] = columns
return 告警标签
def feishu_alert_template(data, webhook_url, labels, grafana_host):
feishu_body = {
"elements": [
{
"alt": {
"content": "",
"tag": "plain_text"
},
"img_key": "img_v3_025m_a0e554c6-fa84-4033-9283-31632163909g",
"tag": "img"
},
{
"tag": "hr"
},
{
"tag": "markdown",
"content": "**告警项**"
},
{
"tag": "action",
"actions": [
{
"tag": "button",
"text": {
"tag": "plain_text",
"content": "快速设置告警沉默"
},
"url": "https://open.feishu.cn/document",
"type": "primary"
},
{
"tag": "button",
"text": {
"tag": "plain_text",
"content": "修改告警规则"
},
"url": "http://uat-monitor.voneyun.com/#/editRule?id=448",
"type": "default"
},
{
"tag": "button",
"text": {
"tag": "plain_text",
"content": "查询告警规则"
},
"url": "http://10.250.49.35:10952/graph?g0.expr=up%7Bjob%3D~%22cmdb_vm_node.%2A%22%7D+%3D%3D+0&g0.tab=1",
"type": "default"
}
,
{
"tag": "button",
"text": {
"tag": "plain_text",
"content": "查看告警大屏"
},
"url": "http://10.250.49.35:10952/graph?g0.expr=up%7Bjob%3D~%22cmdb_vm_node.%2A%22%7D+%3D%3D+0&g0.tab=1",
"type": "default"
}
]
}
],
"header": {
"template": "red",
"title": {
"content": "灾难-CPU使用率过高",
"tag": "plain_text"
}
}
}
groupLabels = data.get('groupLabels')
commonLabels = data.get('commonLabels')
grafana_folder = data.get('groupLabels').get('grafana_folder', '')
告警等级 = groupLabels.get("level", '')
unit = commonLabels.get("unit", '')
alertname = groupLabels["alertname"]
today = datetime.datetime.now()
printf_today = today.strftime('%Y-%m-%d %H:%M:%S')
data['printf_today'] = printf_today
n = 0
# 告警查询按钮修改
generatorURL_old = data.get('alerts')[0].get('generatorURL')
_parsed_url = urlparse(generatorURL_old)
generatorURL = urlunparse(_parsed_url._replace(netloc=grafana_host))
# 告警沉默按钮修改
silenceURL_old = data.get('alerts')[0].get('silenceURL')
_parsed_url = urlparse(silenceURL_old)
silenceURL = urlunparse(_parsed_url._replace(netloc=grafana_host)) # 告警沉默按钮修改
# 设置告警查询告警大屏
panelURL_old = data.get('alerts')[0].get('panelURL')
_parsed_url = urlparse(panelURL_old)
panelURL = urlunparse(_parsed_url._replace(netloc=grafana_host))
edit_rule_url = f"http://192.168.10.76:3000/alerting/list?search={alertname}"
feishu_body['elements'][3]['actions'][0]['url'] = silenceURL
feishu_body['elements'][3]['actions'][1]['url'] = edit_rule_url
feishu_body['elements'][3]['actions'][2]['url'] = generatorURL
feishu_body['elements'][3]['actions'][3]['url'] = panelURL
# 告警文本修改
告警文本 = {'tag': 'column_set', 'flex_mode': 'none', 'background_style': 'grey', 'columns': []}
columns = [
{'tag': 'column', 'width': 'weighted', 'weight': 1, 'vertical_align': 'top', 'elements': [
{'tag': 'markdown', 'content': f'**告警时间**\n{printf_today}'}]},
{'tag': 'column', 'width': 'weighted', 'weight': 1, 'vertical_align': 'top', 'elements': [
{'tag': 'markdown', 'content': f'**告警分组**\n{grafana_folder}'}]}
]
告警文本['columns'] = columns
feishu_body['elements'].insert(1, 告警文本)
# 告警标题修改
if data.get('status') == 'resolved':
feishu_body['header']['template'] = 'green'
feishu_body['header']['title']['content'] = f"😁告警恢复-(数量:{len(data['alerts'])})-{alertname}"
else:
if 告警等级 == '警告':
feishu_body['header']['template'] = 'yellow'
feishu_body['header']['title']['content'] = f"😢{告警等级}(数量:{len(data['alerts'])})-{alertname}"
elif 告警等级 == '灾难':
feishu_body['header']['template'] = 'red'
feishu_body['header']['title']['content'] = f"😢😢😢{告警等级}(数量:{len(data['alerts'])})-{alertname}"
elif 告警等级 == '严重':
feishu_body['header']['template'] = 'red'
feishu_body['header']['title']['content'] = f"😢😢{告警等级}(数量:{len(data['alerts'])})-{alertname}"
else:
feishu_body['header']['template'] = 'blue'
feishu_body['header']['title']['content'] = f"{告警等级}(数量:{len(data['alerts'])})-{alertname}"
# 告警标签修改
if data.get('status') == 'resolved':
labels.append('时间')
for alert in data.get('alerts'):
for k, v in alert.get('labels').items():
if k in labels:
告警标签 = alarm_label_setting(k, v)
feishu_body['elements'].insert(n + 4, 告警标签)
n = n + 1
else:
for k, v in alert.get('annotations').items():
if data.get('status') == 'resolved':
if k == 'state':
pass
else:
告警标签 = alarm_label_setting(f"annotations.{k}", v)
feishu_body['elements'].insert(n + 4, 告警标签)
n = n + 1
else:
告警标签 = alarm_label_setting(k, v)
feishu_body['elements'].insert(n + 4, 告警标签)
n = n + 1
# 告警值处理
try:
for k, v in alert['values'].items():
if k == 'C':
当前状态值 = f"{round(v, 2)}{unit}"
告警标签 = alarm_label_setting(f"当前状态值", 当前状态值)
feishu_body['elements'].insert(n + 4, 告警标签)
n = n + 1
except AttributeError as e:
print('告警值处理异常', e)
# 恢复告警,开始结束时间显示
if data.get('status') == 'resolved':
startsAt = utc2cst(alert.get('startsAt', '0000-00-00T00:00:00Z'))
endsAt = utc2cst(alert.get('endsAt', '0000-00-00T00:00:00Z'))
告警标签 = alarm_label_setting(f"startsAt", startsAt)
feishu_body['elements'].insert(n + 4, 告警标签)
n = n + 1
告警标签 = alarm_label_setting(f"endsAt", endsAt)
feishu_body['elements'].insert(n + 4, 告警标签)
n = n + 1
feishu_body['elements'].insert(n + 4, {"tag": "hr"})
n = n + 1
# 去掉图片
feishu_body['elements'].pop(0)
body = {
"msg_type": "interactive",
"card": feishu_body
}
response = requests.request(method='POST', json=body, url=webhook_url)
return response
# 读取配置文件
with open(f'{os.getcwd()}/config.yml', encoding='utf-8') as f:
config = yaml.safe_load(f)
@app.route('/webhook/feishu/', methods=['POST'])
def receive_data():
data = request.get_json()
webhook = request.args.get('webhook')
formatted_json = json.dumps(data, ensure_ascii=False)
print(formatted_json)
# with open(f'{os.getcwd()}/data.json', 'a', encoding='utf-8') as f:
# today = datetime.datetime.now()
# printf_today = today.strftime('%Y-%m-%d %H:%M:%S')
# f.write(printf_today + ' | ' + formatted_json + '\n')
labels = config.get('labels')
print(feishu_alert_template(data, webhook, labels=labels, grafana_host=config.get('grafana_host')))
return jsonify({"status": "success"})
if __name__ == "__main__":
port = config.get('port', 8000)
# 绑定到所有可用的网卡
app.run(host='0.0.0.0', port=port, debug=True)
config.yml
# 需要显示的标签列表
labels:
- 'FIRM'
- 'host_type'
- 'hostname'
- 'device'
- 'ip'
- 'env'
- 'pod'
- 'topic'
- 'consumergroup'
- 'namespace'
- mountpoint
- startsAt
- endsAt
grafana_host: '192.168.10.76:3000'
port: 8081
requirements.txt
flask
requests
PyYAML
将config.yml与send_feishu.py放在同一级目录下
启动服务
python3 send_feishu.py
创建飞书告警群,获取webhook地址
1. 创建一个飞书群,将相关的人员拉到群里。
2. 添加群机器人
在群设置中找到,群机器人设置,添加机器人。
记录飞书机器人的Webhook 地址
配置联络点


url示例:
配置grafana内置alertmanage配置


配置监控告警大屏

配置告警规则


注意:确保查询结果在表达式C上。否则会导致告警内容无法拿到最新实时结果
配置成功后

告警通知
告警通知样例截图


浙公网安备 33010602011771号