对binlog日志进行统计

# command format: python define_name.py file_name
import sys
import re
f = open(sys.argv[1])
update_list=[]
update_list2=[]
insert_list=[]
insert_list2=[]
delete_list=[]
delete_list2=[]
#line = f.readline()
#while len(line)!=0:
#print sys.argv[1,2,3,4,5,6,7,8,9]
for line in f:
    if not line.strip():
        continue
    line = re.sub('`','',line)
    v_par =re.match("(\s*)(\S+)(\s*)(.*)",line,re.I)
    if v_par.group(2).lower() != 'update' and  v_par.group(2).lower() != 'delete' and  v_par.group(2).lower() !='insert':
        #print line,
        continue
    elif v_par.group(2).lower()=='update':
        c = re.match("(\w+)(\s+)(\w+\.)?(\w+)",line,re.I)
        update_list.append(c.group(4).lower())
    elif v_par.group(2).lower()=='insert':
        c = re.match("(\w+)(\s*)(into)?(\s+)(\w+\.)?(\w+)(\s*\()?",line,re.I)
        insert_list.append(c.group(6).lower())
    elif v_par.group(2).lower()=='delete':
        c = re.match("(\w+)(.*)(from)(\s+)(\w+\.)?(\w+)",line,re.I)
        delete_list.append(c.group(6).lower())

print 'TABLE_NAME','EXEC_COUNT','DML_TYPE'
if len(update_list) > 0:
    update_list2=list(set(update_list))
    for ii in range(len(update_list2)):
        print update_list2[ii] ,update_list.count(update_list2[ii]),"update"


if len(insert_list) > 0:
    insert_list2=list(set(insert_list))
    for ii in range(len(insert_list2)):
        print insert_list2[ii] ,insert_list.count(insert_list2[ii]),"insert"


if len(delete_list) > 0:
    delete_list2=list(set(delete_list))
    for ii in range(len(delete_list2)):
        print delete_list2[ii] ,delete_list.count(delete_list2[ii]),"delete"

f.close();

运行脚本  python zhenban.py log001.log    ##前面是脚本的名字,后面是binlog的名字

得出的结果是

需要自己用excel去进行分列和排序

posted @ 2017-03-02 13:48  糖饼好吃  阅读(496)  评论(0)    收藏  举报