技术宅,fat-man

增加语言的了解程度可以避免写出愚蠢的代码

导航

重写之后的脚本。希望还好

#!/bin/sh


###############################################
#                                             #
#  author:lishujun                            #
#  date:2013-4-17                             #
#  use:$./testread.sh 20130425                #
#                                             #
###############################################

### load Library ###
. common

#init_date $1
#init_db
common_init_stat_date $1
stat_date=$statDate

### define global variable ###

logFile='./logs/'$stat_date'/wap_access_log.'$stat_date'*'
dataFile='./data/access_detail_'$stat_date'_small'
domain='pp.cn,sou.pp.cn,c0.pp.cn,c1.pp.cn,c2.pp.cn,c3.pp.cn'


#domain=$2
#domain='shuqi.com,pp.cn,shuqiread.com'
ucFlags='fr,ct,ac'

### functions ###

cleanDataFile()
{
        echo clean data files...
        rm $dataFile
}


makeDataFile()
{
        #./kcat.sh  $logFile | awk \
        /www/scripts/stat/kcat.sh  $logFile | awk \
                -v domain=$domain \
                -v ucFlags=$ucFlags \
                -v dataFile=$dataFile \
        '
        function extractField(s)
        {
                gsub(/\[/,"",s)
                gsub(/\]/,"",s)
                return s
        }

        function extractFad(url)
        {
                #if(match(url,/\[FAD:/) > 0)
                if(match(url,/\[FAD:[0-9]+\]/) > 0)
                {
                        return substr(url,RSTART+5,RLENGTH-6)
                }
                return "-"
        }

        function parseUrl(url,params)
        {
                params["domain"] = "-"
                if(match(url,/[:\/]/)>0)
                {
                        params["domain"] = substr(url,1,RSTART-1)
                }

                params["path"] = "-"
                if(match(url,/\/[^\?$]*[\?$]/)>0)
                {
                        params["path"] = substr(url,RSTART,RLENGTH-1)
                }

                params["fr"] = "-"
                if(match(url,/[&?]fr=[^&$]+/)!=0)
                {
                        params["fr"]=substr(url,RSTART+4,RLENGTH-4)
                }

                params["ct"] = "-"
                if(match(url,/[&?]ct=[^&$]+/)!=0)
                {
                        params["ct"]=substr(url,RSTART+4,RLENGTH-4)
                }

                params["ac"] = "-"
                if(match(url,/[&?]ac=[^&$]+/)!=0)
                {
                        params["ac"]=substr(url,RSTART+4,RLENGTH-4)
                }
        }

        BEGIN{
                split(domain,domainList,",")
        }

        {
                #print $0
                userid = extractField($4)
                url = extractField($6)
                session = extractField($9)
                fad = extractFad($0)

                if(int(userid) < 1)
                {
                        userid = session
                }

                for(i in domainList)
                {
                        len = length(domainList[i])
                        if(substr(url,0,len) == domainList[i])
                        {
                                parseUrl(url,urlParams)
                                line = userid" "urlParams["domain"]" "urlParams["path"]" "urlParams["fr"]" "urlParams["ct"]" "urlParams["ac"]" "fad
                                #print line

                                if(line in list)
                                {
                                        list[line]++
                                }
                                else
                                {
                                        list[line] = 1
                                        print line >> dataFile
                                }
                                break
                        }
                }

        }
        '
}


main()
{
        echo `date ` start...
        cleanDataFile
        makeDataFile
        echo `date` done
}

#call main function
main

 

 

posted on 2013-04-30 19:18  codestyle  阅读(206)  评论(0)    收藏  举报