C#下载网页源码的方法

 

 

 

 

using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Data; using System.Net; using System.IO; using System.Text.RegularExpressions;

namespace ConsoleApplication1 {     class Program     {         static void Main(string[] args)         {             WebClient client = new WebClient();             string URLAddress = "https://files.cnblogs.com/scy251147/EDaemonSolution.zip";             int n = URLAddress.LastIndexOf("/");             string fileName = URLAddress.Substring(n + 1, URLAddress.Length - n - 1);             string Dir = "D:";             string Path = Dir + "\\" + fileName + ".zip";             try             {                 WebRequest myre = WebRequest.Create(URLAddress);

            }             catch (WebException ex)             { Console.WriteLine(ex.ToString());             }             try             {                 client.DownloadFile(URLAddress, fileName);                 Stream str = client.OpenRead(URLAddress);                 StreamReader reader = new StreamReader(str);                 byte[] mbyte = new byte[100000];                 int allmybyte = (int)mbyte.Length;                 int strartbyte = 0;                 while (allmybyte > 0)                 {                     int m = str.Read(mbyte, strartbyte, allmybyte);                     if (m == 0)                     {                         break;                     }                     strartbyte += m;                     allmybyte -= m;                 }                 FileStream fstr = new FileStream(Path, FileMode.OpenOrCreate, FileAccess.Write);                 fstr.Write(mbyte, 0, strartbyte);                 str.Close();                 fstr.Close();             }             catch (Exception ex)             {                 Console.WriteLine(ex.ToString());               }         }     } }

 

 

 

 

 

 

C#下载网页源码的方法,用到msxml2

using System.Text.RegularExpressions;

using MSXML2;

 

private string gethtm(string link)//------------------------------------下载网页源码

        {

 

 

            MSXML2.XMLHTTP xmlhttp = new MSXML2.XMLHTTP();

            Thread.Sleep(5);

            xmlhttp.open("GET", link, false, null, null);

            Thread.Sleep(5);

            xmlhttp.send("");

            Thread.Sleep(5);

            Byte[] b = (Byte[])xmlhttp.responseBody;           

            string str_txt_htm = Encoding.GetEncoding("GB2312").GetString(b).Trim();

            //txtbox.Text = str_txt_htm;

            //Thread.Sleep(5);//暂停线程

            xmlhttp = null;

            b = null;          

            Regex str_re_htm = new Regex(@"<\s*body(.|\n)*", RegexOptions.IgnoreCase);

            Thread.Sleep(5);//暂停线程

            MatchCollection str_re_txt_htm = str_re_htm.Matches(str_txt_htm);

            str_txt_htm = str_re_txt_htm[0].ToString();          

            Thread.Sleep(5);//暂停线程

            str_re_htm = new Regex(@"(<[.|\n]*?script(.|\n)*?/[.|\n]*?script[\n]*>)|(\n)", RegexOptions.IgnoreCase);

            Thread.Sleep(5);//暂停线程

            str_txt_htm = str_re_htm.Replace(str_txt_htm, "");

            str_txt_htm = str_txt_htm.Replace("\\", "/");

            //File.Delete(@"C:\Documents and Settings\zjc\Local Settings\Temporary Internet Files\" + Path.GetFileNameWithoutExtension(link)+"[1].htm");

            //Directory.

            str_re_htm = null;

            link = null;

            return str_txt_htm;         

 

        }

posted @ 2013-05-16 20:53  晴天有时下鱼  阅读(363)  评论(0编辑  收藏  举报