记录一次利用 python 进行日志模块开发过程

只记录大体思路和我认为其中需要记录的地方。

正则匹配

正则匹配的模式很难记忆,即使记住了,也很难写出无错误的匹配模式。但是,借助网上一些提供实时对比的网站,如 regexr.com
代码示意:

import os
import re

'''
define regex mode
'''
get_imgpath_regex = re.compile(r'''(
    (\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d+)      # time
    \s      # separate
    (.*)?get_imgpath\sused\stime\sis\s      # info
    (\d+.\d+)       # time
    \sargs\sis\s\(u'     # separate
    (.*?)',\)\sresult\sis\s     # img dir
    (.*?.jpg)      # img path
)''', re.VERBOSE)

get_imgpath_flag = 'get_imgpath'

def main():

    for infile in infile_list:
        # prase line using regex mode
        with open(infile, "r") as file:
            for line in file:    # 先判断关键词
                currentDict = {}
                if get_imgpath_flag in line:
                    for groups in regex['get_imgpath_regex'].findall(line):    # 再提取模式对应的内容
                        currentDict = {'date': groups[1], 'cost_time':groups[3],
                        'img_dir':groups[4], 'img_path':groups[5]}
                        # print(currentDict)
                        get_imgpath_match.append(currentDict)
    else:
                    pass

参数配置

采取的方式为参数存储在一个单独的文件,如 config.json。

{
    "FLAG" : {
        "SAVE_SPILT_LOG_FILE_FLAG" : false ,
        "SAVE_MERGE_LOG_FILE_FLAG" : false ,
        "USE_CURRENT_PATH" : false
    },

    "PATH" : {
        "INPUT_LOG_FILE_PATH" : "E:\\zwk\\Code\\logger_read\\data\\pro_data" ,
        "SAVE_SPILT_MERGE_LOG_PATH" : "E:\\zwk\\Code\\logger_read\\output\\spilt_merge_log" ,
        "OUTPUT_RESULT_PATH" : "E:\\zwk\\Code\\logger_read\\output" 
    },
   
    "PARAMETERS" : {
        "windows_size" : 2 ,
        "duplicate_times" : 1 
    }
}

再对参数进行解析,

import json

def main():
    # outfile_path = os.path.normpath("output/filtered")
    global parameters
    if(config['FLAG']['USE_CURRENT_PATH']):
        pwd = os.getcwd()
        config['PATH']['INPUT_LOG_FILE_PATH'] = os.path.join(pwd, 'data')
        config['PATH']['SAVE_SPILT_MERGE_LOG_PATH'] = os.path.join(pwd, 'output')
        config['PATH']['OUTPUT_RESULT_PATH'] = os.getcwd()

    output_file_path = config['PATH']
    flag = config['FLAG']

if __name__ == '__main__':
    this_folder = os.path.dirname(os.path.abspath(__file__))
    config_file = os.path.join(this_folder, 'config.json') 
    exists_check = os.path.isfile(config_file)
    if not exists_check:
        print('Error: loss of config file, Exit !!!')
    with open(config_file, 'r') as f:
        config = json.load(f)
    # improve it, use as global variable
    parameters = config['PARAMETERS']
    main()

目录、文件名

在这里,示例我认为的还不错做法,主要是不受系统影响

    this_folder = os.path.dirname(os.path.abspath(__file__))
    config_file = os.path.join(this_folder, 'config.json') 
    exists_check = os.path.isfile(config_file)
    if not exists_check:
        print('Error: loss of config file, Exit !!!')
    with open(config_file, 'r') as f:
        config = json.load(f)

    # 省略中间
    # write output to files
    os.makedirs(outfile['SAVE_SPILT_MERGE_LOG_PATH'], exist_ok=True)
posted @ 2019-07-04 17:14  不燥不怕  阅读(196)  评论(0编辑  收藏  举报