利用.NET框架提供的 WebClient类 和 WebRequest类,我们可以很轻易地得到给定URL地址的源代码,很简单,以下是C#的完整的例子.

查看例子
http://dotnet.aspx.cc/Exam/GetPageHtml.aspx

GetPageHtml.aspx
<%@ Page language="c#" validateRequest = "false" Codebehind="GetPageHtml.aspx.cs" 
 AutoEventWireup
="false" Inherits="eMeng.Exam.GetPageHtml" 
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" >
<HTML>
  
<HEAD>
    
<title>得到网页源代码</title>
    
<meta name="GENERATOR" Content="Microsoft Visual Studio 7.0">
    
<meta name="CODE_LANGUAGE" Content="C#">
    
<meta name="vs_defaultClientScript" content="JavaScript">
    
<meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5">
  
</HEAD>
  
<body MS_POSITIONING="GridLayout">
    
<form id="aspNetBuffer" method="post" runat="server">
      
<div align="center" style="FONT-WEIGHT: bold">得到任意网页源代码</div>
      
<asp:TextBox id="UrlText" runat="server" Width="400px">http://dotnet.aspx.cc/content.aspx
          
</asp:TextBox>
      
<asp:Button id="WebClientButton" Runat="server" Text="用WebClient得到"></asp:Button>
      
<asp:Button id="WebRequestButton" runat="server" Text="用WebRequest得到"></asp:Button>
      
<br>
      
<asp:TextBox id="ContentHtml" runat="server" Width="100%" Height="360px" TextMode="MultiLine">
          
</asp:TextBox>
    
</form>
  
</body>
</HTML>

GetPageHtml.aspx.cs
using System;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Web;
using System.Web.SessionState;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.HtmlControls;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;

namespace eMeng.Exam
{
    
/// <summary>
    
/// GetPageHtml 的摘要说明。
    
/// </summary>

    public class GetPageHtml : System.Web.UI.Page
    
{
        
protected System.Web.UI.WebControls.Button WebClientButton;
        
protected System.Web.UI.WebControls.Button WebRequestButton;
        
protected System.Web.UI.WebControls.TextBox ContentHtml;
        
protected System.Web.UI.WebControls.TextBox UrlText;
        
protected System.Web.UI.WebControls.Button GetText;
        
private string PageUrl = "";

        
private void Page_Load(object sender, System.EventArgs e)
        
{ }

        
Web Form Designer generated code

        
private void WebClientButton_Click(object sender, System.EventArgs e)
        
{
            PageUrl 
= UrlText.Text;
            WebClient wc 
= new WebClient();
            wc.Credentials 
= CredentialCache.DefaultCredentials;

            
///方法一:
            Byte[] pageData = wc.DownloadData(PageUrl);
            ContentHtml.Text 
= Encoding.Default.GetString(pageData);


            
/// 方法二:
            
/// ***************代码开始**********
            
/// Stream resStream = wc.OpenRead(PageUrl);
            
/// StreamReader sr = new StreamReader(resStream,System.Text.Encoding.Default);
            
/// ContentHtml.Text = sr.ReadToEnd();
            
/// resStream.Close();
            
/// **************代码结束********
            
/// 

            wc.Dispose();
        }


        
private void WebRequestButton_Click(object sender, System.EventArgs e)
        
{
            PageUrl 
= UrlText.Text;
            WebRequest request 
= WebRequest.Create(PageUrl);
            WebResponse response 
= request.GetResponse();
            Stream resStream 
= response.GetResponseStream();
            StreamReader sr 
= new StreamReader(resStream, System.Text.Encoding.Default);
            ContentHtml.Text 
= sr.ReadToEnd();
            resStream.Close();
            sr.Close();
        }


        
private void GetText_Click(object sender, System.EventArgs e)
        
{
            PageUrl 
= UrlText.Text;
            WebRequest request 
= WebRequest.Create(PageUrl);
            WebResponse response 
= request.GetResponse();
            Stream resStream 
= response.GetResponseStream();
            StreamReader sr 
= new StreamReader(resStream, System.Text.Encoding.Default);
            ContentHtml.Text 
= sr.ReadToEnd();
            resStream.Close();
            sr.Close();
            ContentHtml.Text 
= Regex.Replace(ContentHtml.Text, "<[^>]*>""");
            
//替换空格
            ContentHtml.Text = Regex.Replace(ContentHtml.Text, "\\s+"" ");
        }

    }

}

http://www.chenjiliang.com/Article/View.aspx?ArticleID=1193&TypeID=5
posted on 2007-01-29 11:12  mbskys  阅读(110)  评论(0)    收藏  举报