python--格式化MySQL通用日志

# -*- coding: utf-8 -*-
# =============================================================================
#         Desc: 格式化MySQL通用日志并统计SQL执行频率
#       Author: GGA
#        Email:
#     HomePage:
#      Version: 1.0.1
#   LastChange: 2020-12-20
#      History:
# =============================================================================

import sys
import re
import os
import fire
import hashlib
import argparse
import logging


class SQLFingerPrint(object):
    SQL_PATTERNS = [
        r"'(''|\\\\|\\'|[^'])*'",
        r'"(""|\\\\|\\"|[^"])*"',
        r'(""|".*?[^\\]")',
        r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])',
        r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])',
        r'-?\d+(\.\d+)?E-?\d+',
        r'-?0x[\dA-F]+',
    ]

    @classmethod
    def sample_format_sql(cls, sql):
        tmp_sql = sql
        for item in cls.SQL_PATTERNS:
            tmp_sql = re.sub(item, "'?'", tmp_sql)
        tmp_sql = tmp_sql.replace("\r", " ").replace("\n", " ").replace("\t", " ")
        tmp_sql = tmp_sql.replace("'?'", " '?' ").replace(")", " ) ").replace("(", (" ( "))
        tmp_sql = re.sub(r"\s+", " ", tmp_sql)
        tmp_sql = tmp_sql.replace("'?' ,", "")
        tmp_sql = re.sub(r"\s+", " ", tmp_sql)
        tmp_sql = tmp_sql.replace(",(", ", (").replace(", ( '?' )", "")
        tmp_sql = re.sub(r"\s+", " ", tmp_sql)
        return tmp_sql


class GeneralLogHelper(object):
    QUERY_START_TAG = " Query\t"
    QUERY_TIME_PATTERN = r"^[0-9]{6} [0-9]{2}:[0-9]{2}:[0-9]{2}"
    CHECK_SQLS = [
        "select @@session.tx_read_only",
        "select 1",
        "set autocommit=1",
        "set autocommit=0",
        "commit"
    ]

    @classmethod
    def is_check_sql(cls, sql_text: str):
        for tmp_sql in cls.CHECK_SQLS:
            if sql_text.lower().find(tmp_sql.lower()) >= 0:
                return True
        return False

    @classmethod
    def check_env(cls, source_log_path, target_log_path, static_log_path):
        if not os.path.exists(source_log_path):
            print("源文件不存在,请检查")
            return False
        if os.path.exists(target_log_path):
            print("目标文件已存在,请检查")
            return False
        if os.path.exists(static_log_path):
            print("统计文件已存在,请检查")
            return False
        return True

    @classmethod
    def update_sql_map(cls, sql_map: dict, sql_text):
        sql_pf = SQLFingerPrint.sample_format_sql(sql=sql_text)
        sql_md5 = hashlib.md5(sql_pf.encode(encoding='UTF-8')).hexdigest()
        if sql_md5 in sql_map.keys():
            sql_map[sql_md5]["exec_count"] += 1
        else:
            sql_map[sql_md5] = dict()
            sql_map[sql_md5]["exec_count"] = 1
            sql_map[sql_md5]["finger_print"] = sql_pf
            sql_map[sql_md5]["sample_sql"] = sql_text

    @classmethod
    def get_sql_map_print_info(cls, sql_map: dict):
        print_info = []
        sql_items = sorted(sql_map.values(), key=lambda ietm: ietm["exec_count"], reverse=True)
        for sql_item in sql_items:
            print_info.append("*" * 100 + "")
            print_info.append("指纹SQL:    {0}".format(sql_item["finger_print"]))
            print_info.append("示例SQL:    {0}".format(sql_item["sample_sql"]))
            print_info.append("执行次数:    {0}".format(sql_item["exec_count"]))
        return print_info

    @classmethod
    def append_file_content(cls, file_path, file_content):
        with open(file=file_path, encoding="utf-8", mode="a+") as fw:
            fw.write(file_content)

    @classmethod
    def format_sql(cls, source_log_path, target_log_path, static_log_path, ignore_cheek_sql=True):
        print("处理开始。。。")
        if not cls.check_env(
                source_log_path=source_log_path,
                target_log_path=target_log_path,
                static_log_path=static_log_path
        ):
            return
        sql_text = ""
        query_time = ""
        cache_lines = []
        line_num = 0
        new_query_flah = False
        sql_map = dict()
        with open(file=source_log_path, encoding="utf-8", mode="r+") as slh:
            for line in slh.readlines():
                if not line:
                    break
                line_num += 1
                if line_num % 100 == 0:
                    print("处理第{}行记录".format(line_num))
                if len(cache_lines) > 100:
                    cls.append_file_content(
                        file_path=target_log_path,
                        file_content="\n".join(cache_lines)
                    )
                    cache_lines = []
                if re.match(cls.QUERY_TIME_PATTERN, line):
                    query_time = re.search(cls.QUERY_TIME_PATTERN, line).group()
                if line.find(cls.QUERY_START_TAG) > 0:
                    if not new_query_flah:
                        new_query_flah = True
                        continue
                    sql_text = re.sub(
                        r"\s+", " ",
                        str(sql_text).replace("\r", " ").replace("\n", " ").replace("\t", " "))
                    if query_time == "":
                        continue
                    if not (ignore_cheek_sql and cls.is_check_sql(sql_text)):
                        cls.update_sql_map(sql_map, sql_text)
                        cache_lines.append(query_time + "===>" + sql_text)
                    sql_index = str(line).index(cls.QUERY_START_TAG) + len(cls.QUERY_START_TAG)
                    query_header = line[sql_index:-1]
                    sql_text = line[sql_index:]
                else:
                    sql_text = sql_text + " " + line
        cls.append_file_content(
            file_path=target_log_path,
            file_content="\n".join(cache_lines)
        )
        print_info = cls.get_sql_map_print_info(sql_map=sql_map)
        cls.append_file_content(
            file_path=static_log_path,
            file_content="\n".join(print_info)
        )
        print("处理完成。。。")


logger = logging.getLogger()


def init_logger():
    logger.setLevel(level=logging.DEBUG)
    logger_format = logging.Formatter("[%(asctime)s]-[%(levelname)s]: %(message)s")  # output format
    sh = logging.StreamHandler(stream=sys.stdout)  # output to standard output
    sh.setFormatter(logger_format)
    logger.addHandler(sh)


def get_parser():
    parser = argparse.ArgumentParser(
        description='格式化MySQL通用日志并汇总SQL执行情况',
        add_help=False)
    connect_setting = parser.add_argument_group('参数信息')
    parser.add_argument(
        '--help',
        dest='help',
        action='store_true',
        help='获取帮助信息',
        default=False
    )
    connect_setting.add_argument(
        '--source_log_path',
        dest='source_log_path',
        type=str,
        default=None,
        help='通用日志文件路径,无默认值,需指定。'
    )
    connect_setting.add_argument(
        '--target_log_path',
        dest='target_log_path',
        type=str,
        default="./sql_format.log",
        help='目标日志文件路径,默认为./sql_format.log。'
    )
    connect_setting.add_argument(
        '--static_log_path',
        dest='static_log_path',
        type=str,
        default="./sql_stats.log",
        help='统计日志文件路径,默认为./sql_stats.log。'
    )
    connect_setting.add_argument(
        '--ignore_cheek_sql',
        dest='ignore_cheek_sql',
        type=bool,
        default=True,
        help='是否忽略客户端检查SQL,默认忽略。'
    )
    return parser


def parse_args(command_args):
    need_print_help = False if command_args else True
    parser = get_parser()
    args = parser.parse_args(command_args)
    if args.help or need_print_help:
        parser.print_help()
        sys.exit(1)
    if args.source_log_path is None:
        logger.warning("请输入通用日志文件路径")
        parser.print_help()
    return args


def main(command_args):
    args = parse_args(command_args)
    GeneralLogHelper.format_sql(
        source_log_path=args.source_log_path,
        target_log_path=args.target_log_path,
        static_log_path=args.static_log_path,
        ignore_cheek_sql=args.ignore_cheek_sql
    )


if __name__ == '__main__':
    init_logger()
    main(sys.argv[1:])

posted @ 2021-11-15 22:05  TeyGao  阅读(149)  评论(0)    收藏  举报