# -*- coding: utf-8 -*-
# =============================================================================
# Desc: 格式化MySQL通用日志并统计SQL执行频率
# Author: GGA
# Email:
# HomePage:
# Version: 1.0.1
# LastChange: 2020-12-20
# History:
# =============================================================================
import sys
import re
import os
import fire
import hashlib
import argparse
import logging
class SQLFingerPrint(object):
SQL_PATTERNS = [
r"'(''|\\\\|\\'|[^'])*'",
r'"(""|\\\\|\\"|[^"])*"',
r'(""|".*?[^\\]")',
r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])',
r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])',
r'-?\d+(\.\d+)?E-?\d+',
r'-?0x[\dA-F]+',
]
@classmethod
def sample_format_sql(cls, sql):
tmp_sql = sql
for item in cls.SQL_PATTERNS:
tmp_sql = re.sub(item, "'?'", tmp_sql)
tmp_sql = tmp_sql.replace("\r", " ").replace("\n", " ").replace("\t", " ")
tmp_sql = tmp_sql.replace("'?'", " '?' ").replace(")", " ) ").replace("(", (" ( "))
tmp_sql = re.sub(r"\s+", " ", tmp_sql)
tmp_sql = tmp_sql.replace("'?' ,", "")
tmp_sql = re.sub(r"\s+", " ", tmp_sql)
tmp_sql = tmp_sql.replace(",(", ", (").replace(", ( '?' )", "")
tmp_sql = re.sub(r"\s+", " ", tmp_sql)
return tmp_sql
class GeneralLogHelper(object):
QUERY_START_TAG = " Query\t"
QUERY_TIME_PATTERN = r"^[0-9]{6} [0-9]{2}:[0-9]{2}:[0-9]{2}"
CHECK_SQLS = [
"select @@session.tx_read_only",
"select 1",
"set autocommit=1",
"set autocommit=0",
"commit"
]
@classmethod
def is_check_sql(cls, sql_text: str):
for tmp_sql in cls.CHECK_SQLS:
if sql_text.lower().find(tmp_sql.lower()) >= 0:
return True
return False
@classmethod
def check_env(cls, source_log_path, target_log_path, static_log_path):
if not os.path.exists(source_log_path):
print("源文件不存在,请检查")
return False
if os.path.exists(target_log_path):
print("目标文件已存在,请检查")
return False
if os.path.exists(static_log_path):
print("统计文件已存在,请检查")
return False
return True
@classmethod
def update_sql_map(cls, sql_map: dict, sql_text):
sql_pf = SQLFingerPrint.sample_format_sql(sql=sql_text)
sql_md5 = hashlib.md5(sql_pf.encode(encoding='UTF-8')).hexdigest()
if sql_md5 in sql_map.keys():
sql_map[sql_md5]["exec_count"] += 1
else:
sql_map[sql_md5] = dict()
sql_map[sql_md5]["exec_count"] = 1
sql_map[sql_md5]["finger_print"] = sql_pf
sql_map[sql_md5]["sample_sql"] = sql_text
@classmethod
def get_sql_map_print_info(cls, sql_map: dict):
print_info = []
sql_items = sorted(sql_map.values(), key=lambda ietm: ietm["exec_count"], reverse=True)
for sql_item in sql_items:
print_info.append("*" * 100 + "")
print_info.append("指纹SQL: {0}".format(sql_item["finger_print"]))
print_info.append("示例SQL: {0}".format(sql_item["sample_sql"]))
print_info.append("执行次数: {0}".format(sql_item["exec_count"]))
return print_info
@classmethod
def append_file_content(cls, file_path, file_content):
with open(file=file_path, encoding="utf-8", mode="a+") as fw:
fw.write(file_content)
@classmethod
def format_sql(cls, source_log_path, target_log_path, static_log_path, ignore_cheek_sql=True):
print("处理开始。。。")
if not cls.check_env(
source_log_path=source_log_path,
target_log_path=target_log_path,
static_log_path=static_log_path
):
return
sql_text = ""
query_time = ""
cache_lines = []
line_num = 0
new_query_flah = False
sql_map = dict()
with open(file=source_log_path, encoding="utf-8", mode="r+") as slh:
for line in slh.readlines():
if not line:
break
line_num += 1
if line_num % 100 == 0:
print("处理第{}行记录".format(line_num))
if len(cache_lines) > 100:
cls.append_file_content(
file_path=target_log_path,
file_content="\n".join(cache_lines)
)
cache_lines = []
if re.match(cls.QUERY_TIME_PATTERN, line):
query_time = re.search(cls.QUERY_TIME_PATTERN, line).group()
if line.find(cls.QUERY_START_TAG) > 0:
if not new_query_flah:
new_query_flah = True
continue
sql_text = re.sub(
r"\s+", " ",
str(sql_text).replace("\r", " ").replace("\n", " ").replace("\t", " "))
if query_time == "":
continue
if not (ignore_cheek_sql and cls.is_check_sql(sql_text)):
cls.update_sql_map(sql_map, sql_text)
cache_lines.append(query_time + "===>" + sql_text)
sql_index = str(line).index(cls.QUERY_START_TAG) + len(cls.QUERY_START_TAG)
query_header = line[sql_index:-1]
sql_text = line[sql_index:]
else:
sql_text = sql_text + " " + line
cls.append_file_content(
file_path=target_log_path,
file_content="\n".join(cache_lines)
)
print_info = cls.get_sql_map_print_info(sql_map=sql_map)
cls.append_file_content(
file_path=static_log_path,
file_content="\n".join(print_info)
)
print("处理完成。。。")
logger = logging.getLogger()
def init_logger():
logger.setLevel(level=logging.DEBUG)
logger_format = logging.Formatter("[%(asctime)s]-[%(levelname)s]: %(message)s") # output format
sh = logging.StreamHandler(stream=sys.stdout) # output to standard output
sh.setFormatter(logger_format)
logger.addHandler(sh)
def get_parser():
parser = argparse.ArgumentParser(
description='格式化MySQL通用日志并汇总SQL执行情况',
add_help=False)
connect_setting = parser.add_argument_group('参数信息')
parser.add_argument(
'--help',
dest='help',
action='store_true',
help='获取帮助信息',
default=False
)
connect_setting.add_argument(
'--source_log_path',
dest='source_log_path',
type=str,
default=None,
help='通用日志文件路径,无默认值,需指定。'
)
connect_setting.add_argument(
'--target_log_path',
dest='target_log_path',
type=str,
default="./sql_format.log",
help='目标日志文件路径,默认为./sql_format.log。'
)
connect_setting.add_argument(
'--static_log_path',
dest='static_log_path',
type=str,
default="./sql_stats.log",
help='统计日志文件路径,默认为./sql_stats.log。'
)
connect_setting.add_argument(
'--ignore_cheek_sql',
dest='ignore_cheek_sql',
type=bool,
default=True,
help='是否忽略客户端检查SQL,默认忽略。'
)
return parser
def parse_args(command_args):
need_print_help = False if command_args else True
parser = get_parser()
args = parser.parse_args(command_args)
if args.help or need_print_help:
parser.print_help()
sys.exit(1)
if args.source_log_path is None:
logger.warning("请输入通用日志文件路径")
parser.print_help()
return args
def main(command_args):
args = parse_args(command_args)
GeneralLogHelper.format_sql(
source_log_path=args.source_log_path,
target_log_path=args.target_log_path,
static_log_path=args.static_log_path,
ignore_cheek_sql=args.ignore_cheek_sql
)
if __name__ == '__main__':
init_logger()
main(sys.argv[1:])