Python 监控内存溢出

  • 背景
  1. 监控后端程序日志中是否包含:OutOfMemory,包含则重启,并且通过Dingding告警
#!/usr/bin/env python 
# -*- coding: utf-8 -*-
# @Time    : 2023/8/1 10:23
# @File    : outofmemory.py
# @Author  : zk_linux
# @Software: PyCharm
# @Description:


import readline
import time
import subprocess
import re
import logging
import os
from collections import deque
from temp import ail_the_alarm


logging.basicConfig(level=logging.INFO,
                    filename='./log/outofmemory.log',
                    filemode='a',
                    format='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'
                    )


def wc_count(file_name):
    '''
    获取文件行数
    :param file_name:
    :return:
    '''
    file_lins = len(open(file_name).readlines())
    return file_lins


def getLastContent(file_name, lastCount):
    '''
    新增行数
    :param file_name:
    :param lastCount:
    :return:
    '''
    with open(file_name, "r") as f:
        output = deque(f, lastCount);
        return list(output)


def filterate(list, key="OutOfMemory"):
    for item in list:
        if key in item:
            return True
    return False


def restart_container():
    restart = subprocess.run(['docker restart zk-refactor-esl-business'], shell=True, stderr=subprocess.PIPE)

def monitor_log_file(log_file, interval=5):
    now_max_count = 0
    while True:
        new_last_count = wc_count(log_file)
        add_file_count = new_last_count - now_max_count
        logging.info("The program is normal and under continuous monitoring, add_file_count: %s",add_file_count)
        now_max_count = new_last_count
        if add_file_count > 0:
            add_lins = getLastContent(log_file, add_file_count)
            filter_results = filterate(add_lins)
            if filter_results == True:
                logging.error("Program memory overflow, attempting to restart container")
                restart_container()
                webhook = ail_the_alarm.DingTalkUrl('/server/scripts/config.ini').get_config()
                ding_msg = ail_the_alarm.Send_Dingding('HK-集群环境01', webhook['mobile_number'], web_url=webhook['prod_webhook_url'],secret=webhook['prod_secret'], msg="内存溢出,尝试重启esl-business")
                ding_msg.send_dingnding()
        time.sleep(interval)


if __name__ == "__main__":
    log_file = "/usr/local/esl/zk-refactor-esl-business/log/log_error.log"
    monitor_log_file(log_file)

  生产环境调用钉钉告警即可

 通过systemctl管理脚本

[root@acs-hk-ctos7-prod-01 system]# pwd
/etc/systemd/system
[root@acs-hk-ctos7-prod-01 system]# cat omm.service 
[Unit]
Description=My Python Script Service
After=network.target

[Service]
Type=simple
User=root
WorkingDirectory= /server/scripts/
ExecStart=/usr/bin/python3 outofmemory.py
[Install]
WantedBy=multi-user.target

 

posted @ 2023-08-03 21:00  地铁昌平线  阅读(29)  评论(0编辑  收藏  举报