逆水行舟,不进则退

纸上得来终觉浅,绝知此事要躬行
  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

QQ群成员发言次数统计(正则表达式版)

Posted on 2013-09-16 12:12  Jonee  阅读(4274)  评论(5编辑  收藏  举报

1、先将QQ群的消息记录以.txt文件格式导出来,保存路径及名称自己定义(在本文我导出到Y盘,命名为test.txt)

2、程序如下:

data statistics1;
    if _n_=1 then do;
        patternid1=prxparse("/\s\w*-*\w*-*\w*\(\d+\)/");
        patternid2=prxparse("/\(\d+\)/");
    end; 
    retain patternid1 patternid2;
    infile 'Y:\test.txt' truncover;
    input string $200.;
    length name $50. qq $30.;
    call prxsubstr(patternid1,string,start1,length1);
    call prxsubstr(patternid2,string,start2,length2);
    if start1 and start2 gt 0 then do;
        name=substrn(string,start1+1,length1-1);
        qq=substrn(string,start2+1,length2-2);
        output;
    end;
run;

data statistics2;
    if _n_=1 then do;
        patternid1=prxparse("/\s\w*-*\w*-*\w*<(\w*|\d*)@(\w*|\d*).\w+>/"); 
        patternid2=prxparse("/<(\w*|\d*)@(\w*|\d*).\w+>/");
    end;
    retain patternid1 patternid2;
    infile 'Y:\test.txt' truncover;
    input string $200.;
    length name $50. qq $30.;
    call prxsubstr(patternid1,string,start1,length1);
    call prxsubstr(patternid2,string,start2,length2);
    if start1 and start2 gt 0 then do;
        name=substrn(string,start1+1,length1-1);
        qq=substrn(string,start2+1,length2-2);
        output;
    end;
run;

data qunname;
    infile 'Y:\test.txt' truncover firstobs=6 obs=6;
    input @'消息对象:' qunname $;
    call symput('_qunname',qunname);
run;

data statistics(drop=patternid1 patternid2 string start1 start2 length1 length2);
    set statistics1 statistics2;
    where qq^='10000';
    n=_n_;
run;

proc sort data=statistics;by qq n;run;

data match;
    set statistics;
    by qq n;
    if last.qq then output;
    drop n;
run;

data _null_;
    set statistics nobs=t;
    call symput("_nobs",t);
    stop;
run;

proc sql;
    create table rtf as
    select qq,n(qq) as frequency,n(qq)/&_nobs.*100 as rate
    from statistics
    group by qq
    order by 1;
quit;

data rtf;
    merge rtf match;
    by qq;
run;

proc sort data=rtf;by descending frequency;run;

options nodate nonumber;
ods results=off;
title;
footnote;
ods listing close;
ods rtf file='Y:\statistics.rtf' style=Mystyles bodytitle; 
proc report data=rtf nowindows style(report)={font_size=10.5pt pretext="QQ群(&_qunname.)成员发言次数及频率统计分析表" posttext="Author:liyongzhao,Created Date:2013-9-12." just=left} 
    style(column)={font=('times new roman',12pt)};
    column name frequency rate;
    define name/center style(column)={cellwidth=15%} 'QQ群成员';
    define frequency/center style(column)={cellwidth=5%} '发言次数';
    define rate/center style(column)={cellwidth=5%} format=6.3 '发言频率(%)';
run;
ods rtf close;

3、打开Statistics.rtf即可查看结果。

结果类似下图(截取开头部分,隐去QQ群名称和个人QQ号码):