python-使用正则快速解析QQ群聊记录

使用正则表达式,对QQ群聊天记录进行解析,用于分析日期、成员等维度发言情况。

原始文本是
2014-03-28 15:04:25 №┽◎Eagle(369029696)

解析之后
yyyy=2014
mm = 03
dd = 28
hh = 15
mi  =04
ss = 25
nick = №┽◎Eagle
qq = 369029696

 

 

代码如下

# -*- coding: utf-8 -*-  
""" 
 zhangbo2012
 http://www.cnblogs.com/zhangbo2012/
"""
import re

def resolving_by_user(filepath):
    with open(filepath,'r') as rf:
        filecontent = rf.read()

    resolving_result={}

    #2014-03-28 15:04:25 №┽◎Eagle(369029696)
    p = re.compile(r'(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) (.*)\((.*?)\)\n')
    for [yyyy,mm,dd,hh,mi,ss,nick,qq] in p.findall(filecontent):
        if qq in resolving_result.keys():
            temps = resolving_result[qq]
            temps["qq"]=qq
            temps["nick"]=nick
            temps["worldcnt"]+=1
            resolving_result[qq]=temps
        else:
            resolving_result[qq] ={"qq":qq,"nick":nick,"worldcnt":1}

    for value in resolving_result.values():
        print str.rjust(repr(value['qq']),15)+str.rjust(repr(value['worldcnt']),10)

if __name__=='__main__':
    resolving_by_user("2.txt")
 
posted @ 2014-04-30 10:07  zhangbo2012  阅读(546)  评论(0编辑  收藏  举报