使用企业微信的消息推送来发送告警

实现 Prometheus 的 Alertmanager 与企业微信集成，让 Prometheus 触发的告警能够自动推送到企业微信的群聊/机器人中。

先创建企业微信机器人，复制机器人的 Webhook URL（格式类似：https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxxx-xxxx-xxxxx-xxxx-xxxx），把机器人拉入群聊。

假设企业微信的消息推送 webhook 是 https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxxx-xxxx-xxxxx-xxxx-xxxx，对该地址发起 HTTP POST 请求，即可实现给该群组发送消息：

curl 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxxx-xxxx-xxxxx-xxxx-xxxx' \
   -H 'Content-Type: application/json' \
   -d '
   {
    	"msgtype": "text",
    	"text": {
        	"content": "hello world"
    	}
   }'

msgtype：必须字段，中转端点必须生成该字段以适配，否则无法推送，常用的两种类型：

text：文本类型。

{
    "msgtype": "text",
    "text": {
        "content": "广州今日天气：29度，大部分多云，降雨概率：60%",
    "mentioned_list":["wangqing","@all"],
    "mentioned_mobile_list":["13800001111","@all"]
    }
}

markdown：markdown 类型。

{
    "msgtype": "markdown",
    "markdown": {
        "content": "实时新增用户反馈<font color=\"warning\">132例</font>，请相关同事注意。\n>类型:<font color=\"comment\">用户反馈</font>\n>普通用户反馈:<font color=\"comment\">117例</font>\n>VIP用户反馈:<font color=\"comment\">15例</font>"
    }
}

先手动测试 markdown 类型的消息，看企业微信是否能正常收到消息：

curl 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxxx-xxxx-xxxx' \
  -H 'Content-Type: application/json' \
  -d '{
    "msgtype": "markdown",
    "markdown": {
      "content": "### 测试告警\n> 这是一条测试消息"
    }
  }'

为什么要用 webhook 中转？目的是让 Alertmanager 模板渲染后的结果严格匹配企业微信机器人要求的消息结构，要确保：

企业微信机器人只接收特定格式的 JSON 数据，上面的 markdown 类型是告警场景最常用的（支持排版、高亮），渲染后的 JSON 格式要符合企业微信的规范。
content 字段要符合企业微信 markdown 的语法，要处理好一些特殊字符。

Alertmanager 以 JSON 格式向配置的 webhook 端点发送 HTTP POST 请求，固定格式：

{
  "version": "4",
  "groupKey": <string>,              // key identifying the group of alerts (e.g. to deduplicate)
  "truncatedAlerts": <int>,          // how many alerts have been truncated due to "max_alerts"
  "status": "<resolved|firing>",
  "receiver": <string>,
  "groupLabels": <object>,
  "commonLabels": <object>,
  "commonAnnotations": <object>,
  "externalURL": <string>,           // backlink to the Alertmanager.
  "alerts": [
    {
      "status": "<resolved|firing>",
      "labels": <object>,
      "annotations": <object>,
      "startsAt": "<rfc3339>",
      "endsAt": "<rfc3339>",
      "generatorURL": <string>,      // identifies the entity that caused the alert
      "fingerprint": <string>        // fingerprint to identify the alert
    },
    ...
  ]
}

流程：

Alertmanager（固定的原始 JSON 格式） -> 中转端点（打包成企业微信机器人要求的格式） -> 企业微信机器人

下面用 golang 来实现这个中转端，也可以用其它语言实现比如 python。

alertmanager.yml 中配置：

...
# 模板文件
templates:
  - '/etc/alertmanager/templates/*.tmpl'

# 接收器：定义告警的通知方式（邮件、WebHook 等）
receivers:
  ...
  # 企业微信接收器（通过 WebHook）
  - name: 'wechat'
    webhook_configs:
      # golang 中转端
      - url: 'http://10.0.0.12:5000/wechat'
        send_resolved: true  # 告警恢复时也发送通知
        timeout: 15s

golang-wechat/main.go：

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"log"
	"net/http"
	"os"
	"text/template"
	"time"
)

// Alertmanager 原始告警数据结构（与模板变量对应）
type AlertmanagerData struct {
	Receiver          string            `json:"receiver"`
	Status            string            `json:"status"`
	Alerts            []Alert           `json:"alerts"`
	GroupLabels       map[string]string `json:"groupLabels"`
	CommonLabels      map[string]string `json:"commonLabels"`
	CommonAnnotations map[string]string `json:"commonAnnotations"`
	ExternalURL       string            `json:"externalURL"`
	Version           string            `json:"version"`
}

type Alert struct {
	Status      string            `json:"status"`
	Labels      map[string]string `json:"labels"`
	Annotations map[string]string `json:"annotations"`
	StartsAt    time.Time         `json:"startsAt"`
	EndsAt      time.Time         `json:"endsAt"`
	Fingerprint string            `json:"fingerprint"`
}

// 企业微信消息结构（模板渲染结果需符合此格式）
type WechatMessage struct {
	MsgType  string `json:"msgtype"`
	Markdown struct {
		Content string `json:"content"`
	} `json:"markdown"`
}

var (
	wechatWebhook string
	tpl           *template.Template // 全局模板对象
)

func main() {
	// 从环境变量获取配置
	wechatWebhook = os.Getenv("WECHAT_WEBHOOK_URL")
	port := os.Getenv("PORT")
	tplPath := os.Getenv("TEMPLATE_PATH") // 模板文件路径

	if port == "" {
		port = "5000" // 默认端口
	}
	if wechatWebhook == "" {
		log.Fatal("请设置环境变量 WECHAT_WEBHOOK_URL")
	}
	if tplPath == "" {
		tplPath = "/app/templates/wechat.tmpl" // 默认模板路径
	}

	// 加载并解析模板文件（只渲染 content 内容，不含 JSON 结构）
	var err error
	tpl, err = template.ParseFiles(tplPath)
	if err != nil {
		log.Fatalf("加载模板失败: %v", err)
	}
	log.Printf("成功加载模板，名称: %s，路径: %s", tpl.Name(), tplPath)

	// 注册 HTTP 路由
	http.HandleFunc("/wechat", forwardHandler)
	http.HandleFunc("/health", healthHandler)

	// 启动服务
	log.Printf("服务启动，监听端口: %s", port)
	log.Fatal(http.ListenAndServe(":"+port, nil))
}

// 转发处理函数
func forwardHandler(w http.ResponseWriter, r *http.Request) {
	// 解析 Alertmanager 原始数据
	var alertData AlertmanagerData
	if err := json.NewDecoder(r.Body).Decode(&alertData); err != nil {
		http.Error(w, "解析请求失败: "+err.Error(), http.StatusBadRequest)
		log.Printf("解析错误: %v", err)
		return
	}
	log.Printf("收到告警数据: %+v", alertData)

	// 用模板渲染 markdown.content 的文本内容（不含 JSON 结构）
	var contentBuf bytes.Buffer
	// 带有命名模板（{{ define "wechat.message" }}）的模板文件 使用 ExecuteTemplate 而不是 Execute
	if err := tpl.ExecuteTemplate(&contentBuf, "wechat.message", alertData); err != nil {
		http.Error(w, "模板渲染失败: "+err.Error(), http.StatusInternalServerError)
		log.Printf("模板渲染错误: %v", err)
		return
	}
	rawContent := contentBuf.String()
	log.Printf("渲染后的 content 原始内容: %s", rawContent)

	escapedContent := rawContent

	// 构造企业微信消息结构体
	var wechatMsg WechatMessage
	wechatMsg.MsgType = "markdown"
	wechatMsg.Markdown.Content = escapedContent

	// 序列化结构体为 JSON（自动处理所有特殊字符转义）
	jsonData, err := json.Marshal(wechatMsg)
	if err != nil {
		http.Error(w, "JSON 序列化失败: "+err.Error(), http.StatusInternalServerError)
		log.Printf("JSON 序列化错误: %v", err)
		return
	}
	log.Printf("最终发送的 JSON: %s", jsonData)

	// 转发到企业微信
	resp, err := http.Post(wechatWebhook, "application/json", bytes.NewBuffer(jsonData))
	if err != nil {
		http.Error(w, "转发失败: "+err.Error(), http.StatusInternalServerError)
		log.Printf("转发错误: %v", err)
		return
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		http.Error(w, fmt.Sprintf("企业微信接口错误，状态码: %d", resp.StatusCode), http.StatusInternalServerError)
		log.Printf("企业微信接口错误，状态码: %d", resp.StatusCode)
		return
	}

	// 返回成功响应
	w.WriteHeader(http.StatusOK)
	w.Write([]byte(`{"status": "success"}`))
}

// 健康检查
func healthHandler(w http.ResponseWriter, r *http.Request) {
	w.WriteHeader(http.StatusOK)
	w.Write([]byte(`{"status": "healthy"}`))
}

匹配上面 golang 中转程序的模板 templates/wechat.tmpl：

{{ define "wechat.message" }}

{{- range $index, $alert := .Alerts -}}
{{- if gt $index 0 }}
---
{{ end }}

{{- if eq $alert.Status "firing" }}
### 🚨 监控报警（故障告警通知）
{{- else }}
### ✅ 监控报警（恢复通知）
{{- end }}
- **告警类型**: {{ if $alert.Labels.alertname }}{{ $alert.Labels.alertname }}{{ else }}未知告警{{ end }}
- **告警级别**: {{ if $alert.Labels.severity }}{{ $alert.Labels.severity }}{{ else }}未知级别{{ end }}
- **告警状态**: {{ $alert.Status }} {{ if eq $alert.Status "firing" }}故障{{ else }}恢复{{ end }}
- **故障主机**: {{ if $alert.Labels.instance }}{{ $alert.Labels.instance }}{{ else }}-{{ end }} {{ if $alert.Labels.device }}{{ $alert.Labels.device }}{{ else }}-{{ end }}
- **服务环境**: {{ if $alert.Labels.env }}{{ $alert.Labels.env }}{{ else }}未知环境{{ end }}
- **服务名称**: {{ if $alert.Labels.servicename }}{{ $alert.Labels.servicename }}{{ else }}未知服务{{ end }}
- **告警主题**: {{ if $alert.Annotations.summary }}{{ $alert.Annotations.summary }}{{ else }}无主题{{ end }}
- **告警详情**: {{ if $alert.Annotations.message }}{{ $alert.Annotations.message }}{{ end }}{{ if and $alert.Annotations.message $alert.Annotations.description }}；{{ end }}{{ if $alert.Annotations.description }}{{ $alert.Annotations.description }}{{ else }}无详情{{ end }}
{{- if $alert.Annotations.value }}
- **触发阈值**: {{ $alert.Annotations.value }}
{{- end }}
- **故障时间**: {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
{{- if eq $alert.Status "resolved" }}
- **恢复时间**: {{ if not $alert.EndsAt.IsZero }}{{ ($alert.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}{{ else }}持续中{{ end }}
{{- end }}

{{- end }}

{{ end }}

我的 Alertmanager 是用 docker 容器的形式运行的，进入容器中手动触发告警：

# docker exec -it alertmanager sh
amtool alert add \
  --alertmanager.url=http://localhost:9093 \
  --annotation summary="测试告警" \
  --annotation description="通过 amtool 发送的测试" \
  alertname=TestAlert \
  severity=critical \
  instance=test-server

# 生成测试数据（保存为 test_alert.json）
cat > test_alert.json << EOF
{
  "version": "4",
  "status": "firing",
  "alerts": [
    {
      "status": "firing",
      "labels": {
        "alertname": "HostOutOfMemory",
        "severity": "warning",
        "instance": "ubuntu-test-10-0-0-12",
        "hostname": "test-host",
        "env": "prod",
        "servicename": "node-exporter"
      },
      "annotations": {
        "summary": "内存不足告警",
        "description": "内存使用率超过90%"
      },
      "startsAt": "2025-08-09T08:00:00Z"
    }
  ]
}
EOF

# 用 amtool 测试模板渲染
amtool template render \
  --template.glob=/etc/alertmanager/templates/wechat.tmpl \
  --template.data=test_alert.json \
  --template.text='{{ template "wechat.message" . }}'

golang 中转端也是用 docker 容器的形式运行的，一个简单的 golang-wechat/Dockerfile：

FROM golang:1.23-alpine AS builder

WORKDIR /app

COPY main.go .

# 启用 CGO 禁用（静态编译，避免依赖系统库）
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="-s -w" -o mywechat main.go

FROM alpine:3.20
WORKDIR /app
COPY --from=builder /app/mywechat .

EXPOSE 5000

ENTRYPOINT ["./mywechat"]

构建、启动：

docker build -t mywechat:v1 .

docker stop mywechat
docker rm mywechat
docker run -d \
-v /etc/localtime:/etc/localtime:ro \
--user 1026:1026 \
--name mywechat \
-p 5000:5000 \
-e WECHAT_WEBHOOK_URL="https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxxx-xxxx-xxxxx-xxxx-xxxx" \
-v templates:/app/templates \
mywechat:v1

接下来就可以手动触发一些告警来验证告警规则的处理了。

posted @ 2026-01-17 15:44 何达维阅读(3) 评论(0) 收藏举报

刷新页面返回顶部

daveyhe

使用企业微信的消息推送来发送告警

使用企业微信的消息推送来发送告警

公告