读取目标网页的HTML,用System.Net.HttpWebRequest简单封装了一个CLASS
1
using System.Web;
2
using System.Net;
3
using System.IO;
4
5
namespace MyProject.Controllers
6
{
7
public class WebRequestUtility
8
{
9
public string RequestUrl{ get;set;}
10
public string Content { get; set; }
11
public bool ifUrlAvailable { get; set; }
12
13
public WebRequestUtility(string url)
14
{
15
RequestUrl = url;
16
Content = getHttpContent(url);
17
if (Content != "failUrl")
18
{
19
ifUrlAvailable = true;
20
}
21
22
}
23
24
25
/// <summary>
26
/// 根据URL读取返回的HTML内容
27
/// </summary>
28
/// <param name="url"></param>
29
/// <returns></returns>
30
public HttpWebRequest getHttpResponse(string url)
31
{
32
string requestStr = url;
33
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(requestStr);
34
req.Method = "GET";
35
return req;
36
37
}
38
39
public string getHttpContent(string url)
40
{
41
HttpWebRequest req = getHttpResponse(url);
42
try
43
{
44
WebResponse wr = req.GetResponse();
45
StreamReader sr = new StreamReader(wr.GetResponseStream(), System.Text.Encoding.GetEncoding("gb2312"));
46
return sr.ReadToEnd();
47
48
}
49
catch
50
{
51
//if (e.Status == WebExceptionStatus.ProtocolError)
52
return "failUrl";
53
}
54
55
}
56
57
58
public bool checkUrlAvailability(string url)
59
{
60
61
//string requestStr = url;
62
//HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(requestStr);
63
//req.Method = "GET";
64
HttpWebRequest req = getHttpResponse(url);
65
try
66
{
67
req.GetResponse();
68
69
}
70
catch (WebException e)
71
{
72
if (e.Status == WebExceptionStatus.ProtocolError)
73
return false;
74
}
75
76
return true;
77
78
}
79
80
/// <summary>
81
/// 返回错误信息的逻辑
82
/// </summary>
83
/// <param name="e"></param>
84
protected void logException(string e)
85
{
86
//HttpContext.Current.Response.Write(e);
87
}
88
89
90
}
91
}
92
using System.Web;2
using System.Net;3
using System.IO;4

5
namespace MyProject.Controllers6
{7
public class WebRequestUtility8
{9
public string RequestUrl{ get;set;}10
public string Content { get; set; }11
public bool ifUrlAvailable { get; set; }12

13
public WebRequestUtility(string url)14
{15
RequestUrl = url;16
Content = getHttpContent(url);17
if (Content != "failUrl")18
{19
ifUrlAvailable = true;20
}21

22
}23

24

25
/// <summary>26
/// 根据URL读取返回的HTML内容27
/// </summary>28
/// <param name="url"></param>29
/// <returns></returns>30
public HttpWebRequest getHttpResponse(string url)31
{32
string requestStr = url;33
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(requestStr);34
req.Method = "GET";35
return req;36
37
}38

39
public string getHttpContent(string url)40
{41
HttpWebRequest req = getHttpResponse(url);42
try43
{44
WebResponse wr = req.GetResponse();45
StreamReader sr = new StreamReader(wr.GetResponseStream(), System.Text.Encoding.GetEncoding("gb2312"));46
return sr.ReadToEnd();47
48
}49
catch 50
{51
//if (e.Status == WebExceptionStatus.ProtocolError)52
return "failUrl";53
}54
55
}56
57

58
public bool checkUrlAvailability(string url)59
{60

61
//string requestStr = url;62
//HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(requestStr);63
//req.Method = "GET";64
HttpWebRequest req = getHttpResponse(url);65
try66
{67
req.GetResponse();68

69
}70
catch (WebException e)71
{72
if (e.Status == WebExceptionStatus.ProtocolError)73
return false;74
}75

76
return true;77

78
}79

80
/// <summary>81
/// 返回错误信息的逻辑82
/// </summary>83
/// <param name="e"></param>84
protected void logException(string e)85
{86
//HttpContext.Current.Response.Write(e);87
}88

89

90
}91
}92

构造函数在实例化这个类的时候传入URL地址,此时发出http请求并进行回应。class.Content便是回应的html代码。
本人在公司里实习之初做的一些事情就是网页信息抓取,这个class用的比较多,自己用用还是能正常运行没有碰到什么问题。
希望园子里的朋友们看到能帮助完善这个class,或者一些没有考虑到的地方做些建议,毕竟这的确还是个半完成品。
TODO,运用WebClient进行网页图片等类型信息的下载和转储。


浙公网安备 33010602011771号