nasdaqhe's blog

被生活强jian着
  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

首先必须分析referer url中哪部分是关键字,哪部分是页数。例如在hao123输入关键字"我们",在搜索结果中的第7页访问你的网站,那么来源的url就是:http://www.baidu.com/s?lm=0&si=&rn=10&tn=sitehao123&ie=gb2312&ct=0&wd=%CE%D2%C3%C7&pn=60&cl=3
上面的url中,wd=%CE%D2%C3%C7就是关键字部分,其中%CE%D2%C3%C7是"我们"两字通过urlencode得到的CED2是"我"字的GB编码,C3C7是"们"字的GB编码,只要使用urlDecode我们就可以得到原来用户输入了什么关键字访问了自己的网站。而pn=60则是你网站所在的页数,百度是每页10条记录,因此pn=0就是第一页,pn=60是第7页。若url中没有pn部分,则表示是第一页。一般来说,百度对关键字是使用GB编码(如"我们"二字编码成%CE%D2%C3%C7),除非url中含有ie=utf8,那么将采用utf8对关键字编码("我们"二字将编译成%E6%88%91%E4%BB%AC)。相对于百度,google则默认采用utf8对关键字编码,那么就需要将utf8编码转为unicode再通过chrw还原为原来的文字。

一下列出各个搜索引擎的关键字和页数的String表示

名称 关键字String  页数String
www.baidu wd/word pn
image.baidu word pn
cache.baidu word pn
tom  word pn
sou.china query pgNum
sogou query page
114.vnet kw  start 
seek.3721 name/p page
sj53  wd pn
iask k p

------------------------------------上面为多数采用gb对关键字进行编码,下面为多数采用utf8对关键字进行编码

yahoo p b
google q start
baidugoo Keyword pn
search.msn q first

以上只是大致的结果,其实还有很多细致的地方,需要大家自己去发现,我不多说了。

下面是原程序

 

<%
function shortAgent(agentString)
    shortAgent
="Other"
    
if instr(agentString,"MSIE")>=1 then
        shortAgent
=mid(agentString,instr(agentString,"MSIE"),8)
    
end if
    
if instr(agentString,"Opera")>=1 then
        shortAgent
="Opera"
    
end if 
end function
%
>
<%
Function URLDecode(enStr) 
dim deStr 
dim c,i,v 
deStr
="" 
for i=1 to len(enStr) 
    c
=Mid(enStr,i,1
    
if c="%" then 
        v
=eval("&h"+Mid(enStr,i+1,2)) 
        
if v<128 then 
            deStr
=deStr&chr(v) 
            i
=i+2 
        
else 
            
if isvalidhex(mid(enstr,i,3)) then 
                
if isvalidhex(mid(enstr,i+3,3)) then 
                    v
=eval("&h"+Mid(enStr,i+1,2)+Mid(enStr,i+4,2)) 
                    deStr
=deStr&chr(v) 
                    i
=i+5 
                
else
                    v
=eval("&h"+Mid(enStr,i+1,2)+cstr(hex(asc(Mid(enStr,i+3,1))))) 
                    deStr
=deStr&chr(v) 
                    i
=i+3 
                
end if 
            
else 
                destr
=destr&
            
end if 
        
end if 
    
else 
        
if c="+" then 
            deStr
=deStr&" " 
        
else 
            deStr
=deStr&
        
end if 
    
end if 
next 
URLDecode
=deStr 
end function 

function isvalidhex(str) 
isvalidhex
=true 
str
=ucase(str) 
if len(str)<>3 then isvalidhex=false:exit function 
if left(str,1)<>"%" then isvalidhex=false:exit function 
c
=mid(str,2,1
if not (((c>="0"and (c<="9")) or ((c>="A"and (c<="Z"))) then isvalidhex=false:exit function 
c
=mid(str,3,1
if not (((c>="0"and (c<="9")) or ((c>="A"and (c<="Z"))) then isvalidhex=false:exit function 
end function 

function getKw(URLString,kwString)
    getKw
=""
    kwString
=kwString&"="
    kwArray
=split(URLString,"&")
    
for i=0 to UBound(kwArray)
        kwStart
=instr(kwArray(i),kwString)
        
if kwStart=1 then
            getKw
=mid(kwArray(i),kwStart+len(kwString),len(kwArray(i)))
        
end if
    
next
end function

function getPn(URLString,pnString)
    getPn
=0
    pnString
=pnString&"="
    pnArray
=split(URLString,"&")
    
for i=0 to UBound(pnArray)
        pnStart
=instr(pnArray(i),pnString)
        
if pnStart=1 then
            getPn
=mid(pnArray(i),pnStart+len(pnString),len(pnArray(i)))
        
end if
    
next
end function

function calculatePn(pn_tmp,pn_type,pn_beg,pn_base)
    
if pn_type=0 then
        
if pn_tmp=0 then pn_tmp=1
        calculatePn
=pn_tmp
    
else
        
if pn_beg=0 then calculatePn=Int((pn_tmp+1)/pn_base)+1
        
if pn_beg=1 then calculatePn=Int(pn_tmp/pn_base)+1
    
end if
end function
%
>
<%
function UTF2GB(UTFStr) 

for Dig=1 to len(UTFStr) 
  
'如果UTF8编码文字以%开头则进行转换
  if mid(UTFStr,Dig,1)="%" then 
     
'UTF8编码文字大于8则转换为汉字
    if len(UTFStr) >= Dig+8 then 
       GBStr
=GBStr & ConvChinese(mid(UTFStr,Dig,9)) 
       Dig
=Dig+8 
    
else 
      GBStr
=GBStr & mid(UTFStr,Dig,1
    
end if 
  
else 
     GBStr
=GBStr & mid(UTFStr,Dig,1
  
end if 
next 
UTF2GB
=GBStr 
end function 

'UTF8编码文字将转换为汉字
function ConvChinese(x) 
   A
=split(mid(x,2),"%"
   i
=0 
   j
=0 
  
for i=0 to ubound(A) 
     A(i)
=c16to2(A(i))
    
' Response.Write(A(i)&"<br/>") 
  next 
  
for i=0 to ubound(A)-1 
    DigS
=instr(A(i),"0"
    Unicode
="" 
    
for j=1 to DigS-1 
      
if j=1 then 
        A(i)
=right(A(i),len(A(i))-DigS) 
        Unicode
=Unicode & A(i) 
      
else 
         i
=i+1 
         A(i)
=right(A(i),len(A(i))-2
         Unicode
=Unicode & A(i) 
      
end if 
      
'Response.Write(Unicode&"<br/>")
    next 
    
'Response.Write(c2to16(Unicode))
    if len(c2to16(Unicode))=4 then 
       ConvChinese
=ConvChinese & chrw(int("&H" & c2to16(Unicode))) 
    
else
       ConvChinese
=ConvChinese & chr(int("&H" & c2to16(Unicode))) 
    
end if 
  
next 
end function 

'二进制代码转换为十六进制代码
function c2to16(x)
   i
=1 
   
for i=1 to len(x) step 4 
      c2to16
=c2to16 & hex(c2to10(mid(x,i,4))) 
   
next 
end function 

'二进制代码转换为十进制代码
function c2to10(x)
   c2to10
=0 
   
if x="0" then exit function 
     i
=0 
   
for i= 0 to len(x) -1 
      
if mid(x,len(x)-i,1)="1" then c2to10=c2to10+2^(i) 
   
next 
end function 

'十六进制代码转换为二进制代码
function c16to2(x) 
    i
=0 
    
for i=1 to len(trim(x)) 
      tempstr
= c10to2(cint(int("&h" & mid(x,i,1)))) 
      
do while len(tempstr)<4 
         tempstr
="0" & tempstr 
      
loop 
      c16to2
=c16to2 & tempstr 
   
next 
end function 

'十进制代码转换为二进制代码
function c10to2(x) 
   mysign
=sgn(x) 
   x
=abs(x) 
   DigS
=1 
   
do 
      
if x<2^DigS then 
        
exit do 
      
else 
        DigS
=DigS+1 
      
end if 
   
loop 
   tempnum
=

   i
=0 
   
for i=DigS to 1 step-1 
      
if tempnum>=2^(i-1then 
         tempnum
=tempnum-2^(i-1
         c10to2
=c10to2 & "1" 
      
else 
         c10to2
=c10to2 & "0" 
      
end if 
   
next 
   
if mysign=-1 then c10to2="-" & c10to2 
end function
%
>
<%
function resolveRef(URLStringAll)
    URLString
=split(URLStringAll,"?")(1)
    URLHead
=split(URLStringAll,"?")(0)
    
if instr(URLHead,"http://www.baidu.com")>=1 or instr(URLHead,"http://www1.baidu.com")>=1 then
        
if getKw(URLString,"wd")="" then
            kw
=URLDecode(getKw(URLString,"word"))
        
else
            kw
=URLDecode(getKw(URLString,"wd"))
        
end if
        pn
=calculatePn(getPn(URLString,"pn"),1,0,10)    
        
exit function
    
end if
    
if instr(URLHead,"http://image.baidu.com")>=1 then
        kw
=URLDecode(getKw(URLString,"word"))
        pn
=calculatePn(getPn(URLString,"pn"),1,0,16)    
        
exit function
    
end if
    
if instr(URLHead,"http://cache.baidu.com")>=1 then
        kw
=URLDecode(getKw(URLString,"word"))
        kw
=replace(kw,";","")
        kw
=replace(kw,","," ")
        
exit function
    
end if
    
if instr(URLHead,"http://search.tom.com")>=1 then
        
if getKw(URLString,"word")="" then
            kw
=URLDecode(getKw(URLString,"w"))
        
else
            kw
=URLDecode(getKw(URLString,"word"))
        
end if
        pn
=calculatePn(getPn(URLString,"pn"),1,0,10)
        
exit function
    
end if
    
if instr(URLHead,"http://sou.china.com")>=1 then
        kw
=URLDecode(getKw(URLString,"query"))
        pn
=calculatePn(getPn(URLString,"pgNum"),0,0,10)
        
exit function
    
end if
    
if instr(URLHead,"sogou.com")>=1 then
        kw
=URLDecode(getKw(URLString,"query"))
        pn
=calculatePn(getPn(URLString,"page"),0,0,10)
        
exit function
    
end if
    
if instr(URLHead,"http://search.114.vnet.cn")>=1 then
        kw
=URLDecode(getKw(URLString,"kw"))
        pn
=calculatePn(getPn(URLString,"start"),1,0,10)
        
exit function
    
end if
    
if instr(URLHead,"http://seek.3721.com")>=1 then
        kw
=URLDecode(getKw(URLString,"name"))
        
if kw="" then kw=URLDecode(getKw(URLString,"p"))
        pn
=calculatePn(getPn(URLString,"page"),0,0,10)
        
exit function
    
end if
    
if instr(URLHead,"http://search.msn.com.cn")>=1 then
        kw
=UTF2GB(getKw(URLString,"q"))
        pn
=calculatePn(getPn(URLString,"first"),1,1,10)
        
exit function
    
end if
    
if instr(URLHead,"http://www.sj53.com")>=1 then
        kw
=URLDecode(getKw(URLString,"wd"))
        pn
=calculatePn(getPn(URLString,"pn"),1,0,10)
        
exit function
    
end if
    
if instr(URLHead,"iask.com")>=1 then
        kw
=URLDecode(getKw(URLString,"k"))
        pn
=calculatePn(getPn(URLString,"p"),0,0,10)
        
exit function
    
end if
    
if instr(URLHead,"http://www.baidugoo.com")>=1 then
        kw
=UTF2GB(getKw(URLString,"Keyword"))
        pn
=calculatePn(getPn(URLString,"pn"),0,0,10)
        
exit function
    
end if
    
if instr(URLHead,"yahoo.com")>=1 then
        
if instr(LCase(URLStringAll),"ei=utf-8")>=1 then
            
if getKw(URLString,"p")="" then
                kw
=UTF2GB(getKw(URLString,"keyword"))
            
else
                kw
=UTF2GB(getKw(URLString,"p"))
            
end if
            pn
=calculatePn(getPn(URLString,"b"),1,1,10)
        
else
            
if getKw(URLString,"p")="" then
                kw
=URLDecode(getKw(URLString,"keyword"))
            
else
                kw
=URLDecode(getKw(URLString,"p"))
            
end if
            pn
=calculatePn(getPn(URLString,"b"),1,1,10)
        
end if
        
exit function
    
end if
    
if instr(URLStringAll,"http://www.google")>=1 then    
        kw
=UTF2GB(getKw(URLString,"q"))
        pn
=calculatePn(getPn(URLString,"start"),1,0,10)
        
exit function
    
end if
end function
%
>