import os
import re
import argparse
import sys
import time
SLEEP_DURATION = 0.001 # 1ms
MAX_LINE_LENGTH = 1024 * 1024 * 128 # 128MB
enable_color = False
enable_verbose = False
DATETIME_FORMAT_LIST = [
"\s(?P<month>\d+)\-(?P<day>\d+)\s(?P<hour>\d+):(?P<minute>\d+):(?P<second>\d+):",
"(?P<day>\d+)\/(?P<month>[A-Za-z]+)\/(?P<year>\d+):(?P<hour>\d+):(?P<minute>\d+):(?P<second>\d+)"
]
MONTH_DICT = {
"Jan": "01", "Feb": "02", "Mar": "03", "Apr": "04", "May": "05",
"Jun": "06", "Jul": "07", "Aug": "08", "Sep": "09", "Oct": "10",
"Nov": "11", "Dec": "12",
"01": "01", "02": "02", "03": "03", "04": "04", "05": "05",
"06": "06", "07": "07", "08": "08", "09": "09", "10": "10",
"11": "11", "12": "12",
"1": "01", "2": "02", "3": "03", "4": "04", "5": "05",
"6": "06", "7": "07", "8": "08", "9": "09",
" 1": "01", " 2": "02", " 3": "03", " 4": "04", " 5": "05",
" 6": "06", " 7": "07", " 8": "08", " 9": "09",
}
DAY_DICT = {
"01": "01", "02": "02", "03": "03", "04": "04", "05": "05",
"06": "06", "07": "07", "08": "08", "09": "09",
" 1": "01", " 2": "02", " 3": "03", " 4": "04", " 5": "05",
" 6": "06", " 7": "07", " 8": "08", " 9": "09",
"10": "10", "11": "11", "12": "12", "13": "13", "14": "14",
"15": "15", "16": "16", "17": "17", "18": "18", "19": "19",
"20": "20", "21": "21", "22": "22", "23": "23", "24": "24",
"25": "25", "26": "26", "27": "27", "28": "28", "29": "29",
"30": "30", "31": "31",
}
#
def detect_datetime_format(line):
"""
确认文件中日期格式使用的正则
"""
global DATETIME_FORMAT_LIST
for reg in DATETIME_FORMAT_LIST:
match = re.search(reg, line)
if match:
return re.compile(reg)
return None
def extraction_time(line, reg, st):
"""
提取时间
:param line: 需要提取的文字行
:param reg: 正则compile对象
:return: 数字时间
"""
st = str(st)
_year, _month, _day, _hour = st[:4], st[4:6], st[6:8], st[8:10]
match = reg.search(line)
if match is None:
return None
match_dict = match.groupdict()
if match_dict.get("year") is None:
match_dict["year"] = _year
if match_dict.get("month") is None:
match_dict["month"] = _month
if match_dict.get("day") is None:
match_dict["day"] = _day
if match_dict.get("hour") is None:
match_dict['hour'] = _hour
# 处理数字格式
match_dict['month'] = MONTH_DICT[match_dict['month']]
match_dict['day'] = MONTH_DICT[match_dict['day']]
times = "{year}{month}{day}{hour}{minute}{second}".format(**match_dict)
return int(times)
def forward_match(f, st, ed, regprog, ed_inclusive=True, ):
"""
向前读取
"""
if ed_inclusive:
f.seek(ed)
f.readline() # 保证游标定位下一行的开头
ed = f.tell() # ed 文件未部的行首索引
f.seek(st)
line = None
while f.tell() < ed:
# 读取一行,读取到\n
line = ""
while f.tell() < ed and (len(line) == 0 or line[-1] != "\n"):
line += f.readline()
# z
match = re.search(regprog, line)
if match:
f.seek(0 - len(line), os.SEEK_CUR)
return line, f.tell()
f.seek(ed)
return line, f.tell()
def at_line_head(f):
"""
判断是否是一行的开始
"""
if f.tell() == 0:
return True
else:
f.seek(f.tell() - 1)
return f.read(1) == "\n"
def backward_match(f, ed, st, regprog):
"""
向后读取直到regprog匹配一行,如果找到匹配行则超过st,然后定位f的读取指针到对应的行首,
否则定位到st返回匹配的obj,最后读取的行和行的头部位置注意: ed 未读
Return: match # the matched object
line # last line read(maybe not a complete line)
f.tell() # the head position of the line
"""
backward_step_hint = 1024 * 4
# 移动到行尾
f.seek(ed)
if f.tell() < st:
return None, None
match = None
line = None
# 获取当前的位置
old_pos = f.tell()
# cache backward read content in case failing to read a whole
# line during a loop round
last_buffer = ""
backward_step = backward_step_hint # 读取 1024 * 4 字节
while (not match) and (old_pos > st):
new_pos = old_pos - backward_step
if new_pos < st:
new_pos = st
f.seek(new_pos)
lines = []
cur_pos = f.tell()
while cur_pos < old_pos:
size = old_pos - f.tell()
line = f.readline(size)
lines.append(line)
cur_pos = f.tell()
f.seek(new_pos)
valid_start_index = 0
if len(lines) == 1:
if at_line_head(f):
lines[0] = lines[0] + last_buffer
last_buffer = ""
else:
last_buffer = lines[0] + last_buffer
lines = []
else:
# when len(lines) != 1, there may be the following
# possibilities:
# 1. lines[0] is not a complete line
# 2. lines[0] is a complete line
# we can judge by checking if the first character
# of lines[0] is at line head
lines[-1] = lines[-1] + last_buffer
last_buffer = ""
if not at_line_head(f) and new_pos != st:
# lines[0] is not a complete line
# nor does lines[0][0] at position st
last_buffer = lines[0]
valid_start_index = 1
if new_pos == st and len(last_buffer) > 0:
# new_pos == st means the loop will end
# after this round, so we have to handle
# data in last_buffer
lines.append(last_buffer)
total_lines_length = 0
for line in lines:
total_lines_length += len(line)
# handle data from this round
cur_lines_length = 0
for index in reversed(range(valid_start_index, len(lines))):
line = lines[index]
cur_lines_length += len(line)
match = re.search(regprog, line)
if match:
# locate f's reading pointer
f.seek(
total_lines_length \
- cur_lines_length \
+ new_pos)
return line, f.tell()
# update old_pos
old_pos = new_pos
f.seek(st)
return line, f.tell()
def binary_seek_pos(f, st, ed, start_time, cmp_pattern):
"""
:param start_time: 配置时间 type:int
:param f: 日志文件句柄
:param st: 文件开始位置
:param ed: 文件结束位置
:param cmp_pattern: 时间正则
:return:
"""
while st < ed:
mid = st + (ed - st) / 2
f.seek(mid)
line, res_pos = forward_match(f, mid, ed, cmp_pattern)
times = extraction_time(line=line, reg=cmp_pattern, st=start_time)
if times:
# modify group(0) compare if match pattern is after
# or equal to the cmp_pattern
if times >= start_time: # true or fasle line time 是否大于 st
if res_pos == ed:
# 如果这导致死循环,向后搜索一行并比较注意:如果我们不处理 res_pos == ed 情况,
# 我们可能会遇到死循环,比如只剩下 2 行,第一行有 10 个字节,第二行有100个字节,
# 那么“mid”会一直定位在第2行,如果第2行中的pattern意外地在cmp_pattern之后或者等于cmp_pattern,
# 就会出现死循环,因为“ed”在下一轮不会改变
line, back_res_pos = backward_match(f, mid, st, cmp_pattern)
times = extraction_time(line=line, reg=cmp_pattern, st=start_time)
if not times or back_res_pos == res_pos:
# 这意味着只剩下一行,它涵盖了位置 st 和 ed,只需返回 res_pos
return res_pos
elif back_res_pos == st:
# this means only two lines left, and
# they cover positions st and ed. just
# compare and decide which to return
if times >= start_time:
return st
else:
return res_pos
else:
if times >= start_time:
ed = back_res_pos
else:
st = back_res_pos
else:
ed = res_pos
else:
if res_pos == st:
# 这意味着 st 和 ed 必须被同一行覆盖,只需返回 stres_pos
return st
st = res_pos
else:
line, res_pos = backward_match(f, mid, st, cmp_pattern)
match = extraction_time(line, cmp_pattern, start_time)
if not match:
# the whole file does not contain any valid line
return None
# found one valid line, compare with cmp_pattern
if match >= start_time:
ed = res_pos
else:
# this line and the lines follow, until ed,
# all locate before the target cmp_pattern,
# thus return ed directly
return ed
return None if st > ed else ed
def get_start_and_end_pos(file, start_time, end_time, regular):
"""
提取结果
:param file:
:param start_time:
:param end_time:
:param regular:
:return:
"""
lpms = 50 # 读取50行,
# 打开文件
with open(file, "r") as f:
# 确定日志文件的正则格式
format = None
for _ in range(3):
line = f.readline().strip("\n")
format = detect_datetime_format(line)
if format is not None:
break
if format is None:
sys.stderr.write("log date format is not"
" supported,file:%s\n" % file)
return -1
# 获取文件读取开始位置
start_pos = 0
f.seek(0, os.SEEK_END)
end_pos = f.tell()
# get start read position of the file
start_read_pos = binary_seek_pos(f=f, st=start_pos, ed=end_pos,
cmp_pattern=format, start_time=start_time)
if start_read_pos is None:
sys.stderr.write("Error: no matching start line for reading.\n")
return -1
# 读取文件结束位置
if not end_time:
end_read_pos = end_pos
else:
end_read_pos = binary_seek_pos(f=f, st=start_read_pos, ed=end_pos,
cmp_pattern=format, start_time=end_time)
if end_read_pos is None:
sys.stderr.write("Error: no matching end line for reading.\n")
return -1
# 开始读取文件
f.seek(start_read_pos)
while f.tell() < end_read_pos:
line = f.readline()
if f.tell() > end_read_pos:
break
sys.stdout.write(line)
# 控制读取速度
if lpms > 0 and line_count >= lpms:
line_count = 0
time.sleep(SLEEP_DURATION)
def check_param(file_list, start_time, end_time):
"""
校验参数
:param file_list:
:param start_time:
:param end_time:
:param regular:
:return:
"""
# 检查每个文件是否合法
for file in file_list:
if not os.path.isfile(file):
sys.stderr.write("file path not exist,%s" % file)
sys.exit(1)
try:
time.strptime(start_time, '%Y%m%d%H%M%S')
except:
sys.stderr.write(
"start_time:%s wrong format,"
"the supported time formats "
"are %Y%m%d%H%M%S, eg:20220909121314" % start_time)
sys.exit(1)
try:
if end_time is not None:
time.strptime(end_time, '%Y%m%d%H%M%S')
except:
sys.stderr.write(
"end_time:%s wrong format,"
"the supported time formats are "
"%Y%m%d%H%M%S, eg:20220909121314" % end_time)
sys.exit(1)
def file_handle(cmd_namespace):
"""
文件相关操作
:param cmd_namespace:
:return:
"""
# 获取参数
file_list = getattr(cmd_namespace, "file")
start_time = getattr(cmd_namespace, "start")
end_time = getattr(cmd_namespace, "end")
regular = getattr(cmd_namespace, "regular")
check = getattr(cmd_namespace, "check")
# 校验参数
check_param(file_list, start_time, end_time)
# 打印参数
if check in [1, '1']:
sys.stdout.write("file_list:%s" % file_list)
sys.stdout.write("start_time:%s" % start_time)
sys.stdout.write("end_time:%s" % end_time)
sys.stdout.write("regular:%s" % regular)
for file in file_list:
def init_parser(target_parser):
"""
初始化参数
:param target_parser:
:return:
"""
target_parser.add_argument("-f", "--file", nargs="+",
dest="file", required=True,
help="-f file1 file2 ")
target_parser.add_argument("-s", "--start-datetime", dest="start",
required=True, help="-s 20220909122300")
target_parser.add_argument("-e", "--end-datetime", dest="end",
required=False, help="-e 20220909122300")
target_parser.add_argument("-r", "--reg", dest="regular",
required=False, help="-r uri=\\s+,"
"Match regular from results")
target_parser.add_argument("-c", "--check", dest="check", required=False,
const=0, type=int, help="-c 1, Check"
" parameters,1:print"
" parameters,0:not "
"output parameters,default 0")
def main():
"""
:return:
"""
parser = argparse.ArgumentParser(
description="Usage: " \
"logcat -s '20220101000000' -e '20220101010101' -f " \
" LOGFILE1.log LOGFILE2.log ... ")
init_parser(parser)
cmd_namespace = parser.parse_args()
file_handle(cmd_namespace)
if __name__ == "__main__":
try:
sys.exit(main())
except Exception as ex:
sys.exit(-255)