网上好多asp版本的百度抓取程序,现贴上asp.net 2.0,C#的百度收录数抓取方法。希望各位指正。
1
using System;
2
using System.Collections.Generic;
3
using System.Text;
4
using System.Net;
5
using System.Text.RegularExpressions;
6
namespace Tool
7
{
8
public class SeoCount
9
{
10
/// <summary>返回百度:某期间,收录数量
11
/// 参数strTimespan代表意义
12
/// "1"请选择要查询近几天的收录量
13
/// "1"查询昨日收录情况
14
/// "7"查询最近1星期收录情况
15
///"30"查询最近1月收录情况
16
///"360"查询最近1年收录情况
17
///"0"查询总的(所有日期)收录情况
18
/// </summary>
19
/// <param name="strHostName"></param>
20
/// <returns></returns>
21
public string BaiduCount(string strTimespan,string strHostName)
22
{
23
string uri = "http://www.baidu.com/s?lm="+strTimespan+"&wd=site:"+strHostName;
24
string html = new WebClient().DownloadString(uri);
25
string pattern = @"百度一下,找到相关网页[\s\S]*?(?=篇,用时)";
26
//string number = new Regex(@"找到相关网页约([\d,]+)篇").Match.Groups[1].Value;
27
string number1 = Regex.Match(html, pattern, RegexOptions.IgnoreCase).Value;
28
string lastnumber = number1.Replace("百度一下,找到相关网页", "");
29
//如果有很多篇去掉约,返回纯数字串
30
if (lastnumber.Contains("约"))
31
{
32
lastnumber.Replace("约","");
33
}
34
//如果一篇都没有返回0
35
if (lastnumber == "")
36
{
37
lastnumber = "0";
38
}
39
return lastnumber;
40![]()
41
}
42
}
43
}
using System;2
using System.Collections.Generic;3
using System.Text;4
using System.Net;5
using System.Text.RegularExpressions;6
namespace Tool7
{8
public class SeoCount9
{10
/// <summary>返回百度:某期间,收录数量11
/// 参数strTimespan代表意义12
/// "1"请选择要查询近几天的收录量13
/// "1"查询昨日收录情况14
/// "7"查询最近1星期收录情况15
///"30"查询最近1月收录情况16
///"360"查询最近1年收录情况17
///"0"查询总的(所有日期)收录情况18
/// </summary>19
/// <param name="strHostName"></param>20
/// <returns></returns>21
public string BaiduCount(string strTimespan,string strHostName)22
{23
string uri = "http://www.baidu.com/s?lm="+strTimespan+"&wd=site:"+strHostName;24
string html = new WebClient().DownloadString(uri);25
string pattern = @"百度一下,找到相关网页[\s\S]*?(?=篇,用时)";26
//string number = new Regex(@"找到相关网页约([\d,]+)篇").Match.Groups[1].Value; 27
string number1 = Regex.Match(html, pattern, RegexOptions.IgnoreCase).Value;28
string lastnumber = number1.Replace("百度一下,找到相关网页", "");29
//如果有很多篇去掉约,返回纯数字串30
if (lastnumber.Contains("约"))31
{32
lastnumber.Replace("约","");33
}34
//如果一篇都没有返回035
if (lastnumber == "")36
{37
lastnumber = "0";38
}39
return lastnumber;40

41
}42
}43
}


浙公网安备 33010602011771号