Django开发告警接口(webhook)对接Altermanager告警

告警日志统计

告警去重统计

告警人分组邮件

原理图

此项目主要使用Django开发告警接口,对接Altermanager告警,实现告警人分组,邮件等

告警信息日志,告警信息统计等等

开发过程

model

from django.db import models


# Create your models here.
class alerts(models.Model):
    startsAt = models.DateTimeField(verbose_name='告警产生时间')
    endsAt = models.DateTimeField(verbose_name='告警恢复时间')
    instance = models.CharField(max_length=50, verbose_name='实例', blank=True)
    alertname = models.CharField(max_length=100, verbose_name='告警名称')
    status = models.CharField(max_length=20, verbose_name='状态', blank=True)
    severity = models.CharField(max_length=20, verbose_name='告警级别', blank=True)
    message = models.CharField(max_length=1000, verbose_name='告警信息', blank=True)
    known = models.BooleanField(default=False, verbose_name='知悉')
    memo = models.CharField(max_length=50, verbose_name='知悉备注', blank=True)

    def __str__(self):
        return self.message

    class Meta:
        db_table = 'alerts'
        verbose_name = '告警日志'
        verbose_name_plural = verbose_name
        # ordering = ['-startsAt']  # 按故障时间倒排


class production(models.Model):
    startsAt = models.DateTimeField(verbose_name='告警产生时间')
    endsAt = models.DateTimeField(verbose_name='告警恢复时间')
    instance = models.CharField(max_length=50, verbose_name='实例', blank=True)
    alertname = models.CharField(max_length=100, verbose_name='告警名称')
    status = models.CharField(max_length=20, verbose_name='状态', blank=True)
    severity = models.CharField(max_length=20, verbose_name='告警级别', blank=True)
    message = models.CharField(max_length=1000, verbose_name='告警信息', blank=True)
    known = models.BooleanField(default=False, verbose_name='知悉')

    # memo = models.CharField(max_length=50, verbose_name='知悉备注', blank=True)

    def __str__(self):
        return self.message

    class Meta:
        db_table = 'production'
        verbose_name = '告警统计'
        verbose_name_plural = verbose_name


class alarmuser(models.Model):
    username = models.CharField(max_length=20, verbose_name='告警人', blank=True)
    useremail = models.EmailField(max_length=20, verbose_name='告警邮件', blank=True)
    group = models.CharField(max_length=20, verbose_name='分组', blank=True)

    def __str__(self):
        return self.username

    class Meta:
        db_table = 'alarmuser'
        verbose_name = '告警人配置'
        verbose_name_plural = verbose_name

注册xadmin后台

import xadmin

from django.contrib import admin

from xadmin import views
from webhook.models import alerts, production, alarmuser


class GlobalSetting:
    site_title = "长风破浪"
    site_footer = "长风破浪"
    menu_style = "accordion"  # 这个是设置菜单主题
    enable_themes = True
    use_bootswatch = True
    refresh_times = [5, 10, 30, 60]


xadmin.site.register(views.CommAdminView, GlobalSetting)


class AlertsAdmin(object):
    """xadmin的全局配置"""
    site_title = "长风破浪"  # 设置站点标题
    site_footer = "长风破浪"  # 设置站点的页脚
    menu_style = "accordion"  # 设置菜单折叠
    '''设置列表可显示的字段'''
    list_display = ('startsAt', 'endsAt', 'instance', 'alertname', 'status', 'severity', 'message', 'known', 'memo',)

    list_filter = ['status', 'severity', 'startsAt', 'endsAt']
    search_fields = ['instance', 'alertname']
    # list_per_page设置每页显示多少条记录,默认是100条
    list_per_page = 50


class ProductionAdmin(object):
    menu_style = "accordion"  # 设置菜单折叠
    list_display = ('startsAt', 'endsAt', 'instance', 'alertname', 'status', 'severity', 'message', 'known',)

    list_filter = ['status', 'severity', 'startsAt', 'endsAt']
    search_fields = ['instance', 'alertname']
    # list_per_page设置每页显示多少条记录,默认是100条
    list_per_page = 50


class AlarmuserAdmin(object):
    list_display = ('username', 'useremail', 'group')


xadmin.site.register(alerts, AlertsAdmin)
xadmin.site.register(production, ProductionAdmin)
xadmin.site.register(alarmuser, AlarmuserAdmin)

view视图

import json
import smtplib
from email.mime.text import MIMEText

import yaml
from dateutil import parser
from django.http import HttpResponse
from webhook.models import alerts as alerts_t, alarmuser
from webhook.models import production
import datetime
from jinja2 import Environment, FileSystemLoader


# 获取html目录
class ParseingTemplate:
    def __init__(self, templatefile):
        self.templatefile = templatefile

    def template(self, **kwargs):
        try:
            env = Environment(loader=FileSystemLoader('templates'))
            template = env.get_template(self.templatefile)
            template_content = template.render(kwargs)
            return template_content
        except Exception as error:
            raise error


# 时区转换(增加八个小时)
def time_zone_conversion(utctime):
    format_time = parser.parse(utctime).strftime('%Y-%m-%dT%H:%M:%SZ')
    time_format = datetime.datetime.strptime(format_time, "%Y-%m-%dT%H:%M:%SZ")
    return str(time_format + datetime.timedelta(hours=8))


# 获取告警人的分组和邮件
def get_email(email_name=None, action=0):
    """
    :param email_name: 发送的邮件列表名
    :param action: 操作类型,0: 查询收件人的邮件地址列表, 1: 查询收件人的列表名称, 2: 获取邮件账号信息
    :return: 根据action的值,返回不通的数据结构
    """
    if action == 0:
        email = alarmuser.objects.filter(group=email_name)
        email_lsit = []
        for i in email:
            email_lsit.append(i.useremail)
        print('显示邮件', email_lsit)
        return email_lsit
    elif action == 1:
        group_list = []
        group = alarmuser.objects.values("group").distinct()
        for i in group:
            group_list.append(i['group'])
        print('显示组:', group_list)


# 获取邮件的地址的配置
def get_email_conf(file, email_name=None, action=0):
    """
    :param file: yaml格式的文件类型
    :param email_name: 发送的邮件列表名
    :param action: 操作类型,0: 查询收件人的邮件地址列表, 1: 查询收件人的列表名称, 2: 获取邮件账号信息
    :return: 根据action的值,返回不通的数据结构
    """
    try:
        with open(file, 'r', encoding='utf-8') as fr:
            read_conf = yaml.safe_load(fr)
            if action == 0:
                for email in read_conf['email']:
                    if email['name'] == email_name:
                        return email['receive_addr']
                    else:
                        print("%s does not match for %s" % (email_name, file))
                else:
                    print("No recipient address configured")
            elif action == 1:
                return [items['name'] for items in read_conf['email']]
            elif action == 2:
                return read_conf['send']
    except KeyError:
        print("%s not exist" % email_name)
        exit(-1)
    except FileNotFoundError:
        print("%s file not found" % file)
        exit(-2)
    except Exception as e:
        raise e


# 发送邮件地址
def sendEmail(title, content, receivers=None):
    if receivers is None:
        receivers = ['chenf-o@glodon.com']
    send_dict = get_email_conf('email.yaml', action=2)
    mail_host = send_dict['smtp_host']
    mail_user = send_dict['send_user']
    mail_pass = send_dict['send_pass']
    sender = send_dict['send_addr']
    print(mail_host, mail_user, mail_pass, sender)
    msg = MIMEText(content, 'html', 'utf-8')
    msg['From'] = "{}".format(sender)
    msg['To'] = ",".join(receivers)
    print(receivers)
    print(msg['To'])
    msg['Subject'] = title
    try:
        smtpObj = smtplib.SMTP_SSL(mail_host, 465)
        smtpObj.login(mail_user, mail_pass)
        smtpObj.sendmail(sender, receivers, msg.as_string())
        print('mail send successful.')
    except smtplib.SMTPException as e:
        print(e)


# 先保存,后发邮件
def webhook(request):
    if request.method == "GET":
        # email = get_email(action=1)
        return HttpResponse('禁止get')
    if request.method == 'POST':
        try:
            request_data = request.body
            print(request_data.decode())
            request_dict = json.loads(request_data.decode('utf-8'))
            alerts = request_dict['alerts']

            prometheus_data = json.loads(request.body)

            for i in alerts:
                msg = i['annotations']['message'] if 'message' in i['annotations'] else 'null'
                if msg == 'null' and 'summary' in i['annotations']:
                    msg = i['annotations']['summary']
                print(i['startsAt'][0:19] + i['status'] + " :" + msg)
                if msg == 'null': print(i)
                ints = i['labels']['instance'] if 'instance' in i['labels'] else 'unknown'

                print(ints + " --- " + i['labels']['alertname'] + " ---- " + i['labels']['severity'])
                print(i['endsAt'])
                a = alerts_t()
                a.startsAt = time_zone_conversion(i['startsAt'])
                str = '0001-01-01'
                print(str in i['endsAt'])
                if (str in i['endsAt']):
                    a.endsAt = '0001-01-01 00:00:00'
                else:
                    a.endsAt = time_zone_conversion(i['endsAt'])
                print(a.endsAt)
                a.instance = ints
                a.alertname = i['labels']['alertname']
                if i['status'] == 'firing':
                    a.status = '告警中'
                if i['status'] == 'resolved':
                    a.status = '已恢复'
                    a.known = True
                a.severity = i['labels']['severity']
                a.message = msg
                a.save()

                startime = time_zone_conversion(i['startsAt'])
                endtime = a.endsAt
                instances = ints
                AlarmObject = production.objects.filter(startsAt=startime, endsAt=endtime, instance=instances)
                print(AlarmObject)
                if AlarmObject.exists():
                    pass
                else:
                    b = production()
                    b.startsAt = time_zone_conversion(i['startsAt'])
                    # print(str in i['endsAt'])
                    if (str in i['endsAt']):
                        b.endsAt = '0001-01-01 00:00:00'
                    else:
                        b.endsAt = time_zone_conversion(i['endsAt'])
                    # print(b.endsAt)
                    b.instance = ints
                    b.alertname = i['labels']['alertname']
                    if i['status'] == 'firing':
                        b.status = '告警中'
                    if i['status'] == 'resolved':
                        b.status = '已恢复'
                        b.known = True
                    b.severity = i['labels']['severity']
                    b.message = msg
                    b.save()
                print('显示a:', a)
                # 时间转换,转换成东八区时间
            for k, v in prometheus_data.items():
                if k == 'alerts':
                    for items in v:
                        if items['status'] == 'firing':
                            items['startsAt'] = time_zone_conversion(items['startsAt'])
                        else:
                            items['startsAt'] = time_zone_conversion(items['startsAt'])
                            items['endsAt'] = time_zone_conversion(items['endsAt'])
            print(prometheus_data)
            team_name = prometheus_data["commonLabels"]["team"]
            print(team_name)
            generate_html_template_subj = ParseingTemplate('email_template_firing.html')
            html_template_content = generate_html_template_subj.template(
                prometheus_monitor_info=prometheus_data
            )
            # 获取收件人邮件列表
            email_list = get_email(email_name=team_name, action=0)
            print(email_list)
            print(prometheus_data['commonLabels']['alertname'])
            sendEmail(
                prometheus_data['commonLabels']['alertname'],
                html_template_content,
                receivers=email_list
            )
            # return "prometheus monitor"
            return HttpResponse(1)
        except Exception as e:
            print(e)
            raise e
        # finally:
        #     return HttpResponse(1)

需要完整代码,请留言

稍后整理完毕传到github上

posted @ 2022-05-21 18:49  南宫乘风  阅读(227)  评论(3编辑  收藏  举报