prometheus监控k8s并发送报警
1.编辑prometheus的configmap文件
kubectl edit cm prometheus-1738826520-server
2.添加如下红色字体
apiVersion: v1 data: alerting_rules.yml: |- groups: - name: deployment Monitoring rules: - alert: DeploymentReplicasUnavailable expr: kube_deployment_status_replicas_unavailable > 0 #这里的prometheus语句可以自己修改 for: 10s labels: severity: critical annotations: summary: "命名空间 {{ $labels.namespace }} 中的 Deployment {{ $labels.deployment }} 存在不可用副本" description: "命名空间 {{ $labels.namespace }} 中的 Deployment {{ $labels.deployment }} 当前有 {{ $value }} 个不可用副本,受影响的 Pod:{{ $labels.pod }},当前状态:{{ $labels.phase }}" alerts: | {} allow-snippet-annotations: "false" prometheus.yml: | global: evaluation_interval: 1m scrape_interval: 30s scrape_timeout: 10s rule_files: - /etc/config/recording_rules.yml - /etc/config/alerting_rules.yml - /etc/config/rules - /etc/config/alerts scrape_configs: - job_name: prometheus ......
3.编辑alertmanager的configmap
kubectl edit cm prometheus-1738826520-alertmanager
4.内容如下:
global: resolve_timeout: 20s smtp_smarthost: 'smtp.126.com:465' smtp_from: 'xxx@126.com' smtp_auth_username: 'xx@126.com' smtp_auth_password: 'xxx' smtp_require_tls: false route: group_by: ['alertname', 'cluster', 'alertsource'] group_wait: 30s group_interval: 20s repeat_interval: 90s receiver: 'default-receiver' receivers: - name: 'default-receiver' email_configs: - to: 'xxxx@126.com' send_resolved: true html: '{{ template "email.html" . }}' headers: #Subject: "[告警] {{ .CommonLabels.alertname }} - {{ (index .Alerts 0).Annotations.summary }}" Subject: '{{ if eq .Status "firing" }}🚨 告警触发: {{ (index .Alerts 0).Annotations.summary }}{{ else }}✅ 告警恢复: {{ (index .Alerts 0).Annotations.summary }}{{ end }}' templates: - /etc/alertmanager/*.tmpl email.html.tmpl: |- {{ define "email.html" }} <!DOCTYPE html> <html> <body> <p>*状态*: {{ .Status | toUpper }}</p> <p>*详情*: {{ (index .Alerts 0).Annotations.description }}</p> <p>*触发时间*: {{ (index .Alerts 0).StartsAt | tz "Asia/Shanghai" | date "2006-01-02 15:04:05" }} (北京时间)</p> </body> </html> {{ end }}
5.最后报警内容如下:


浙公网安备 33010602011771号