asp采集列表demon
<%On Error Resume Next
Server.ScriptTimeOut=9999999
dim url,html,content,myurl,mytitle,tempstr
conn.execute "delete * from tuijian"
response.write "清除原有数据成功!下面进行数据采集:</br>"
url="****"
Html=GetURL(url) '把地址url传递到GetURL这个函数
Html=Bytes2BStr(Html) '二进制转换成字符一下
Set xiaoqi = New Regexp
xiaoqi.IgnoreCase = True
xiaoqi.Global = True
xiaoqi.Pattern ="target=""_blank"">(.*)</a></td>"
Set Matches =xiaoqi.Execute(Html)
set xiaoqi=nothing
dim i,sosort
i=0
For Each Match in Matches
i=i+1
if(i<=18) then
mytitle=Match.SubMatches(0)
myurl="http://www.sogou.com/sogou?query="
myurl=myurl+Match.SubMatches(0)
myurl=myurl+"&pid=sogou-site-b427426b8acd2c2e"
if(i<=6) then sosort=1 end if
if(i>6 and i<=12) then sosort=2 end if
if(i>12 and i<=18) then sosort=3 end if
'以下是入库
set rss=server.createobject("adodb.recordset")
rss.open "select * from tuijian",conn,1,3
rss.addnew
rss("title")=mytitle
rss("url")=myurl
rss("sosort")=sosort
rss("px")=i
rss("color")=""
rss("Location")=""
rss("space")=12
rss.update
rss.close
set rss=nothing
tempstr=mytitle+"---"+myurl+"入库成功<br/>"
response.write tempstr
end if
Next
Response.write "</br><font color=red>采集完毕,共18条信息,请更新首页静态页面即可!</font>"
Response.End
' 获取远程HTML
Function GetURL(url)
Set Retrieval = CreateObject("Microsoft.XMLHTTP")
With Retrieval
.Open "GET", url, False
.Send
GetURL = .responsebody
if len(.responsebody)<100 then
response.write "获取远程文件 <a href="&url&" target=_blank>"&url&"</a> 失败。"
response.end
end if
End With
Set Retrieval = Nothing
End Function
' 二进制转字符串
function bytes2bstr(vin)
strreturn = ""
for i = 1 to lenb(vin)
thischarcode = ascb(midb(vin,i,1))
if thischarcode < &h80 then
strreturn = strreturn & chr(thischarcode)
else
nextcharcode = ascb(midb(vin,i+1,1))
strreturn = strreturn & chr(clng(thischarcode) * &h100 + cint(nextcharcode))
i = i + 1
end if
next
bytes2bstr = strreturn
end function
%>
浙公网安备 33010602011771号