利用WebClient和WebRequest类获得网页源代码
关键是ValidateRequest="false",要不然会说request.form,报错
<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default3.aspx.cs" Inherits="test_Default3" ValidateRequest="false"%>

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" >
<head runat="server">
<title>无标题页</title>
</head>
<body>
<form id="form1" runat="server">
<div align="center" style="FONT-WEIGHT: bold">得到任意网页源代码</div>
<asp:TextBox id="UrlText" runat="server" Width="400px">http://boco.com.cn
</asp:TextBox>
<asp:Button id="WebClientButton" Runat="server" Text="用WebClient得到" OnClick="WebClientButton_Click"></asp:Button>
<asp:Button id="WebRequestButton" runat="server" Text="用WebRequest得到" OnClick="WebRequestButton_Click"></asp:Button>
<asp:Button ID="Button1" runat="server" OnClick="GetText_Click" Text="Button" /><br>
<asp:TextBox id="ContentHtml" runat="server" Width="100%" Height="360px" TextMode="MultiLine">
</asp:TextBox>

</form>
</body>
</html>
private string PageUrl = "";

protected void WebClientButton_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebClient wc = new WebClient();
wc.Credentials = CredentialCache.DefaultCredentials;

///方法一:
Byte[] pageData = wc.DownloadData(PageUrl);
ContentHtml.Text = Encoding.Default.GetString(pageData);


/// 方法二:
/// ***************代码开始**********
/// Stream resStream = wc.OpenRead(PageUrl);
/// StreamReader sr = new StreamReader(resStream,System.Text.Encoding.Default);
/// ContentHtml.Text = sr.ReadToEnd();
/// resStream.Close();
/// **************代码结束********
///
wc.Dispose();
}

protected void WebRequestButton_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
}

protected void GetText_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
ContentHtml.Text = Regex.Replace(ContentHtml.Text, "<[^>]*>", "");
//替换空格
ContentHtml.Text = Regex.Replace(ContentHtml.Text, "\\s+", " ");
}
<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default3.aspx.cs" Inherits="test_Default3" ValidateRequest="false"%>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" >
<head runat="server">
<title>无标题页</title>
</head>
<body>
<form id="form1" runat="server">
<div align="center" style="FONT-WEIGHT: bold">得到任意网页源代码</div>
<asp:TextBox id="UrlText" runat="server" Width="400px">http://boco.com.cn
</asp:TextBox>
<asp:Button id="WebClientButton" Runat="server" Text="用WebClient得到" OnClick="WebClientButton_Click"></asp:Button>
<asp:Button id="WebRequestButton" runat="server" Text="用WebRequest得到" OnClick="WebRequestButton_Click"></asp:Button>
<asp:Button ID="Button1" runat="server" OnClick="GetText_Click" Text="Button" /><br>
<asp:TextBox id="ContentHtml" runat="server" Width="100%" Height="360px" TextMode="MultiLine">
</asp:TextBox>
</form>
</body>
</html>
private string PageUrl = "";
protected void WebClientButton_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebClient wc = new WebClient();
wc.Credentials = CredentialCache.DefaultCredentials;
///方法一:
Byte[] pageData = wc.DownloadData(PageUrl);
ContentHtml.Text = Encoding.Default.GetString(pageData);

/// 方法二:
/// ***************代码开始**********
/// Stream resStream = wc.OpenRead(PageUrl);
/// StreamReader sr = new StreamReader(resStream,System.Text.Encoding.Default);
/// ContentHtml.Text = sr.ReadToEnd();
/// resStream.Close();
/// **************代码结束********
///
wc.Dispose();
}
protected void WebRequestButton_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
}
protected void GetText_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
ContentHtml.Text = Regex.Replace(ContentHtml.Text, "<[^>]*>", "");
//替换空格
ContentHtml.Text = Regex.Replace(ContentHtml.Text, "\\s+", " ");
}


浙公网安备 33010602011771号