一个简易文章抓取工具(C#) (转载)



/*************************************
 * CopyRight (c) edzh.com
 * Date --> 2006-3-22
 * Coder --> yesun
 *************************************/

using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;
using System.Web;
using System.Threading;
using System.Xml;
namespace GetArticle
{
  /// <summary>
 /// 夜隼信息采集器 v2.0 - 针对edzh.com开发
 /// </summary>
 public class Form1 : System.Windows.Forms.Form
 {

  public Thread t;

  DataTable listdt = new DataTable();
  public int j = 0;
  //多线程,只允许5个线程同时进行
  public static int maxThreadCount = 4;
  public static int currentThreadCount = 0;
  public static int intCurrentThread = 0;
  public bool istrue = true;
  public Thread[] thread;


  public static string encoding = "gb2312";


  //20s判断一次当前线程数
  private System.Timers.Timer runable_Timer = new System.Timers.Timer(20*1000);
  /// <summary>
  /// 必需的设计器变量。
  /// </summary>
  private System.ComponentModel.Container components = null;

  public Form1()
  {
   //
   // Windows 窗体设计器支持所必需的
   //
   InitializeComponent();
   BindLink();
   BindEncode();
   //
   // TODO: 在 InitializeComponent 调用后添加任何构造函数代码
   //
  }
  /// <summary>
  /// 应用程序的主入口点。
  /// </summary>
  [STAThread]
  static void Main()
  {
   Application.Run(new Form1());
  }

  static ManualResetEvent ev = new ManualResetEvent(false);


  /// <summary>
  /// 开始抓取
  /// </summary>
  /// <param name="sender"></param>
  /// <param name="e"></param>
  private void btnStrat_Click(object sender, System.EventArgs e)
  {
   //测试抓取文章
   string url   = this.url.Text.Trim();
   string reg   = this.reg.Text.Trim();
   string folderpath = this.floder.Text.Trim();
   string startTag  = this.startTag.Text.Trim();
   string endTag  = this.endTag.Text.Trim();
   string adstartTag = this.adStartTag.Text.Trim();
   string adendTag  = this.adEndTag.Text.Trim();

   encoding = this.coder.SelectedItem.ToString().ToLower();
   

   //归零
   j = 0;
   this.comboBoxListURL.Enabled = false;
   try
   {
    maxThreadCount = Int32.Parse(this.textBoxMaxThread.Text);
   }
   catch{}


   //ListView Clear
   this.lvResult.Clear();
   this.lvResult.FullRowSelect = true;
   this.lvResult.View = View.LargeIcon;
   this.lvResult.View = View.Details;
   this.lvResult.Columns.Add("编号",80, HorizontalAlignment.Center);
   this.lvResult.Columns.Add("标题",300,HorizontalAlignment.Left);
   this.lvResult.Columns.Add("状态",50, HorizontalAlignment.Left);
   this.lvResult.Columns.Add("大小",65,HorizontalAlignment.Left);
   this.lvResult.Columns.Add("耗时",75,HorizontalAlignment.Left);

   ImageList imgList = new ImageList();
   try
   {
    Image largeImg = Image.FromFile(Application.StartupPath+"file://largeImg.gif/");
    imgList.Images.Add(largeImg);
   }
   catch{}
   this.listView1.Columns.Clear();
   this.listView1.Items.Clear();
   this.listView1.LargeImageList = imgList;
   this.listView1.Scrollable = true;
   //this.listView1.SmallImageList = this.imglist_fild;
   this.listView1.View = View.LargeIcon;
   //this.listView1.View = View.Details;
   this.listView1.GridLines = true;
   this.listView1.FullRowSelect = true;
   this.listView1.Columns.Add("名称", 60, HorizontalAlignment.Left);


   if(this.chkBoxIsMutiPage.Checked)
   {
    //多页搜索
    if(txtUrl.Text.Trim()!="")
    {
     int startpage = 0;
     int endpage = 0;
     int leijia = 1;
     try
     {
      startpage = Convert.ToInt32(this.txtstartpage.Text);
      endpage = Convert.ToInt32(this.txtendpage.Text);
      leijia = Convert.ToInt32(this.txtleijia.Text.Trim());

     }
     catch
     {
     }
     //调用抓取接口
     //for(int i=startpage;i<=endpage;i++)
     thread = new Thread[endpage - startpage + 1];

     this.btnStrat.Enabled = false;

     int tempInt = 0;
     this.statusBar1.Text = "正在初始化线程...";
     for(int i=startpage;i<=endpage;i=i+leijia)
     {
      GetArticle ga = new GetArticle();
      //传入相关参数
      ga.url = txtUrl.Text.Replace("@pageid",i.ToString());
      ga.reg = reg;
      ga.pageReg = this.txtPagePatt.Text;
      ga.folderpath = folderpath;
      ga.startTag = startTag;
      ga.endTag = endTag;
      ga.adStartTag = adstartTag;
      ga.adEndTag = adendTag;
      ga.parentForm = this;


      Thread th = new Thread(new ThreadStart(ga.strat));
      thread[tempInt++] = th;
     }     
     this.statusBar1.Text = "共"+tempInt+"个线程保存队列中,正在启动线程,请稍候...";
     

     runable_Timer.Elapsed += new System.Timers.ElapsedEventHandler(timer_CheckThread);
     runable_Timer.Start();
    }
    else
    {
     MessageBox.Show("请输入通用网址");
    }

   }
   else
   {
    if(url!="")
    {
     //调用抓取接口
     GetArticle ga = new GetArticle();
     //传入相关参数
     ga.url = url;
     ga.reg = reg;
     ga.pageReg = this.txtPagePatt.Text;
     ga.folderpath = folderpath;
     ga.startTag = startTag;
     ga.endTag = endTag;
     ga.adStartTag = adstartTag;
     ga.adEndTag = adendTag;
     ga.parentForm = this;


     this.btnStrat.Enabled = false;
     this.statusBar1.Text = "正在准备抓取数据,请稍候...";

 

     ThreadStart ts = new ThreadStart(ga.strat);
     t = new Thread(ts);
     t.Name = "线程#1";

     ListViewItem item = new ListViewItem(t.Name, 0);
     item.SubItems.Add(t.Name);
     item.Tag = t.GetHashCode();
     item.Text = t.Name;
     item.ForeColor = Color.Red;
     item.EnsureVisible();
     this.listView1.Items.AddRange(new ListViewItem[] { item });

     t.Priority = ThreadPriority.Lowest;
     t.Start();
    }
    else
    {
     MessageBox.Show("请输入网址");
    }
   }

  }


  /// <summary>
  /// 定时检查线程数
  /// </summary>
  /// <param name="sender"></param>
  /// <param name="e"></param>
  void timer_CheckThread(object sender, System.Timers.ElapsedEventArgs e)
  {
   System.Timers.Timer initTimer = (System.Timers.Timer)sender;
   initTimer.Stop();


   //判断当前线程数,如果不足5个,则Join新的线程
   if(currentThreadCount < maxThreadCount)
   {
    //表示当前线程可以加入线程,使得总执行线程为5个
    try
    {
     if(thread[intCurrentThread] != null && thread[intCurrentThread].ThreadState == ThreadState.Unstarted)
     {
      thread[intCurrentThread].Priority = ThreadPriority.Lowest;
      currentThreadCount++;
      thread[intCurrentThread].Name = "线程#"+(intCurrentThread+1);

      this.statusBar1.Text = thread[intCurrentThread].Name+"已开始执行...";

      thread[intCurrentThread].Start();
      
      ListViewItem item = new ListViewItem("线程#"+(intCurrentThread+1), 0);
      //Item.Tag 和 Thread.GetHashCode关联
      item.Tag = thread[intCurrentThread].GetHashCode();
      item.SubItems.Add("线程#"+(intCurrentThread+1));
      item.Text = "线程#"+(intCurrentThread+1);
      item.ForeColor = Color.Red;
      item.EnsureVisible();
      this.listView1.Items.AddRange(new ListViewItem[] { item });

      intCurrentThread++;

       
     }
     else
     {
      istrue = false;
     }
    }
    catch
    {
     istrue = false;
    }
   }

   initTimer.Interval = 3 * 1000;
   initTimer.Start();
  }


 
  /// <summary>
  /// 设置编码
  /// </summary>
  void BindEncode()
  {
   //clear list

   for(int i=this.coder.Items.Count-1;i>=0;i--)
   {
    this.coder.Items.RemoveAt(i);
   }

   this.coder.Items.Add((object)"gb2312");
   this.coder.Items.Add((object)"utf-8");
   this.coder.SelectedIndex = 0;
  }

  /// <summary>
  /// 绑定一些默认的网站
  /// </summary>
  void BindLink()
  {

   //ComBox list


   for(int i=this.comboBoxListURL.Items.Count-1;i>=0;i--)
   {
    this.comboBoxListURL.Items.RemoveAt(i);
   }

   //绑定默认数据
   try
   {
    MyItem myitem;
    XmlDataDocument xmlDoc = new XmlDataDocument();
    xmlDoc.Load(Application.StartupPath+"/URL.xml");
    XmlNodeList xmlNodes = xmlDoc.SelectNodes("//root/url");
    for(int i = 0;i<xmlNodes.Count;i++)
    {
     XmlElement xmlElem = (XmlElement)xmlNodes[i];
     myitem = new MyItem();
     myitem.id = Convert.ToInt32(xmlElem.GetAttribute("id"));
     myitem.name = Convert.ToString(xmlElem.GetAttribute("name"));
     myitem.regex = b2a(xmlElem.GetAttribute("regex"));
     myitem.url = b2a(xmlElem.GetAttribute("url"));
     myitem.starttag = b2a(xmlElem.GetAttribute("starttag"));
     myitem.endtag = b2a(xmlElem.GetAttribute("endtag"));
     myitem.adstarttag = b2a(xmlElem.GetAttribute("adstarttag"));
     myitem.adendtag = b2a(xmlElem.GetAttribute("adendtag")); 
     try
     {
      myitem.pageReg = b2a(xmlElem.GetAttribute("pageReg")); 
     }
     catch{}
     //add to list
     this.comboBoxListURL.Items.Add(myitem);
    }  
    this.comboBoxListURL.SelectedIndex = 0;
   }
   catch
   {
   }
  }


  /// <summary>
  /// 转换一些特殊字符
  /// </summary>
  /// <param name="str"></param>
  /// <returns></returns>
  string a2b(string str)
  {
   str = str.Replace("<","<");
   str = str.Replace(">",">");
   return str;
  }

  /// <summary>
  /// 转换一些特殊字符
  /// </summary>
  /// <param name="str"></param>
  /// <returns></returns>
  string b2a(string str)
  {   
   str = str.Replace("<","<");
   str = str.Replace(">",">");
   return str;
  }


  /// <summary>
  /// 终止搜索
  /// </summary>
  /// <param name="sender"></param>
  /// <param name="e"></param>
  private void btnReset_Click(object sender, System.EventArgs e)
  {
   //中止线程
   try
   {
    istrue = false;
    if(this.chkBoxIsMutiPage.Checked)
    {
     //Application.Exit();
     //终止所有运行中的线程
     for(int i=0;i<10;i++)
     {
      thread[i].Abort();
      this.statusBar1.Text = "线程"+thread[i].Name+"已终止!";
     }     
     //归零
     j = 0;
     currentThreadCount = 0;
     intCurrentThread = 0;

    }
    else
    {
     t.Abort();
    }
    this.statusBar1.Text = "已终止搜索!";
    this.comboBoxListURL.Enabled = true;
    this.btnStrat.Enabled = true;
    this.btnReset.Enabled = true;
   }
   catch
   {
   }
  }


  /// <summary>
  /// 保存网站
  /// </summary>
  /// <param name="sender"></param>
  /// <param name="e"></param>
  private void btnSave_Click(object sender, System.EventArgs e)
  {
   try
   {
    XmlDataDocument xmlDoc = new XmlDataDocument();
    XmlElement xmlElem;
    if(File.Exists(Application.StartupPath+"/URL.xml"))
    {
     xmlDoc.Load(Application.StartupPath+"/URL.xml");
    }
    else
    {
     xmlDoc.LoadXml("<root />");
    }
    xmlElem = xmlDoc.CreateElement("url");
    xmlElem.SetAttribute("id" ,xmlDoc.SelectNodes("/root/url").Count.ToString());
    xmlElem.SetAttribute("name" ,a2b(this.name.Text));
    xmlElem.SetAttribute("url" ,a2b(this.url.Text));
    xmlElem.SetAttribute("regex", a2b(this.reg.Text));
    xmlElem.SetAttribute("starttag", a2b(this.startTag.Text));
    xmlElem.SetAttribute("endtag", a2b(this.endTag.Text));
    xmlElem.SetAttribute("adstarttag",a2b(this.adStartTag.Text));
    xmlElem.SetAttribute("adendtag", a2b(this.adEndTag.Text));
    xmlElem.SetAttribute("pageReg", a2b(this.txtPagePatt.Text));
    xmlDoc.DocumentElement.AppendChild(xmlElem);
    xmlDoc.Save(Application.StartupPath+"/URL.xml");
    MessageBox.Show("操作成功!");
    //refresh list
    this.BindLink();
    this.comboBoxListURL.SelectedIndex = this.comboBoxListURL.Items.Count-1;
   }
   catch(Exception ex)
   {
    MessageBox.Show(ex.ToString());
   }
  
  }


  /// <summary>
  /// 新建网站
  /// </summary>
  /// <param name="sender"></param>
  /// <param name="e"></param>
  private void menuItem2_Click(object sender, System.EventArgs e)
  {
   this.name.Text = "";
   this.url.Text = "";
   this.reg.Text = "";
   this.startTag.Text = "";
   this.endTag.Text = "";
   this.adStartTag.Text = "";
   this.adEndTag.Text = "";
   this.txtPagePatt.Text = "";
   this.txtReplace.Text = "";
  }

  /// <summary>
  /// 退出程序
  /// </summary>
  /// <param name="sender"></param>
  /// <param name="e"></param>
  private void menuItem3_Click(object sender, System.EventArgs e)
  {
   Application.Exit();
  }

  /// <summary>
  /// 是否多页抓取
  /// </summary>
  /// <param name="sender"></param>
  /// <param name="e"></param>
  private void chkBoxIsMutiPage_CheckedChanged(object sender, System.EventArgs e)
  {
   if(this.txtUrl.Enabled)
   {
    this.txtUrl.Enabled = false;
   }
   else
   {
    this.txtUrl.Enabled = true;
   }
   if(this.txtstartpage.Enabled)
   {
    this.txtstartpage.Enabled = false;
   }
   else
   {
    this.txtstartpage.Enabled = true;
   }
   if(this.txtendpage.Enabled)
   {
    this.txtendpage.Enabled = false;
   }
   else
   {
    this.txtendpage.Enabled = true;
   }
   if(this.txtleijia.Enabled)
   {
    this.txtleijia.Enabled = false;
   }
   else
   {
    this.txtleijia.Enabled = true;
   }
   if(this.textBoxMaxThread.Enabled)
   {
    this.textBoxMaxThread.Enabled = false;
   }
   else
   {
    this.textBoxMaxThread.Enabled = true;
   }
   //分析URL
   string strUrl= this.url.Text.Trim();
   try
   {
    if(strUrl.ToLower().IndexOf("page=") > -1)
    {
     strUrl = strUrl.Substring(0,strUrl.IndexOf("page=")+5) + "@pageid" + strUrl.Substring(strUrl.IndexOf("&",strUrl.IndexOf("page=")+1));
    }
    if(strUrl.ToLower().IndexOf("pageid=") > -1)
    {
     strUrl = strUrl.Substring(0,strUrl.IndexOf("pageid=")+7) + "@pageid" + strUrl.Substring(strUrl.IndexOf("&",strUrl.IndexOf("pageid=")+1));
    }
    if(strUrl.ToLower() == this.url.Text.Trim().ToLower())
    {
     //表示没有处理
     if(strUrl.IndexOf("_") > -1)
     {
      strUrl = strUrl.Substring(0,strUrl.LastIndexOf("_")+1)+"@pageid"+strUrl.Substring(strUrl.LastIndexOf("."));
     }
    }
    if(strUrl.ToLower() == this.url.Text.Trim().ToLower())
    {
     //表示没有处理
     if(strUrl.ToLower().IndexOf("index") > -1)
     {
      strUrl = strUrl.Substring(0,strUrl.LastIndexOf("index")+5)+"@pageid"+strUrl.Substring(strUrl.LastIndexOf("."));
     }
    }
   }
   catch
   {
    this.txtUrl.Text = this.url.Text;
   }
   this.txtUrl.Text = strUrl;
  }
  
  

  /// <summary>
  /// 选择网站
  /// </summary>
  /// <param name="sender"></param>
  /// <param name="e"></param>
  private void comboBoxListURL_SelectedIndexChanged(object sender, System.EventArgs e)
  {
   try
   {
    MyItem myitem = (MyItem)comboBoxListURL.SelectedItem;
    this.url.Text = myitem.url;
    this.name.Text = myitem.name;
    this.reg.Text = myitem.regex;
    this.startTag.Text = myitem.starttag;
    this.endTag.Text = myitem.endtag;
    this.adStartTag.Text = myitem.adstarttag;
    this.adEndTag.Text = myitem.adendtag;
    this.txtPagePatt.Text = myitem.pageReg;
   }
   catch{}
  }


  /// <summary>
  /// 保存
  /// </summary>
  /// <param name="sender"></param>
  /// <param name="e"></param>
  private void btnDel_Click(object sender, System.EventArgs e)
  {
   try
   {
    MyItem myitem = (MyItem)comboBoxListURL.SelectedItem;

    if(myitem != null)
    {
     XmlDataDocument xmlDoc = new XmlDataDocument();
     if(File.Exists(Application.StartupPath+"/URL.xml"))
     {
      xmlDoc.Load(Application.StartupPath+"/URL.xml");
     }
     else
     {
      xmlDoc.LoadXml("<root />");
     }

     XmlNode node = xmlDoc.SelectSingleNode("/root/url[@id="+myitem.id+"]");
     xmlDoc.SelectSingleNode("/root").RemoveChild(node);
     xmlDoc.Save(Application.StartupPath+"/URL.xml");
     MessageBox.Show("操作成功!");
    }

    BindLink();

   }
   catch(Exception ex)
   {
    MessageBox.Show(ex.Message);
   }
  }

 }


 /// <summary>
 /// 抓取文章类
 /// </summary>
 public class GetArticle
 {
  #region "属 性"

  public Form1 parentForm = null;

  private string _url = "";
  public string url
  {
   get{return _url;}
   set{this._url=value;}
  }
  private string _reg = "";
  public string reg
  {
   get{return _reg;}
   set{this._reg=value;}
  }
  private string _reg1 = "";
  public string reg1
  {
   get{return _reg1;}
   set{this._reg1=value;}
  }

  private string _pageReg = "";
  public string pageReg
  {
   get{return _pageReg;}
   set{this._pageReg=value;}
  }

  private string _folderpath = "";
  public string folderpath
  {
   get{return _folderpath;}
   set{this._folderpath=value;}
  }
  private string _startTag = "";
  public string startTag
  {
   get{return _startTag;}
   set{this._startTag=value;}
  }
  private string _endTag = "";
  public string endTag
  {
   get{return _endTag;}
   set{this._endTag=value;}
  }

  private string _adStartTag = "";
  public string adStartTag
  {
   get{return _adStartTag;}
   set{this._adStartTag=value;}
  }
  private string _adEndTag = "";
  public string adEndTag
  {
   get{return _adEndTag;}
   set{this._adEndTag=value;}
  }

  #endregion


  /// <summary>
  /// 开始抓取数据
  /// </summary>
  /// <param name="url">网址</param>
  /// <param name="reg">正则表达式</param>
  /// <returns></returns>
  public void strat()
  {
   if(this.folderpath=="")
   {
    folderpath = "C:\\Documents and Settings\\"+Environment.UserName+"\\桌面\\";
   }
   if(!Directory.Exists(folderpath))
   {
    Directory.CreateDirectory(folderpath);
   }
   if(url=="")
   {
    return;
   }
   if(reg=="")
   {
    reg = ".*";
   }
   string content = "";
   Regex regex;
   Match mc;
   content = getWebContent(url);content = content.Replace("\"","'"); 

   //开始用正则表达式取出图片文件名
   regex = new Regex(reg,RegexOptions.Compiled | RegexOptions.IgnoreCase);
   
            XmlDataDocument xmlDoc = new XmlDataDocument();
   xmlDoc.LoadXml("<root />");
   XmlElement xmlElem = xmlDoc.CreateElement("ArticleList");

   
   
   int successCount = 1;
   int i= 1;
   for (mc = regex.Match(content),i=1; mc.Success; mc = mc.NextMatch(),i++)
   {
    DateTime startTime = DateTime.Now;
    //if(i>10)break;
    //outHtml += "标题:" + mc.Groups["topic"].Value + " 链接:" + mc.Groups["url"].Value+"\n";
    //抓取出来了数据,然后根据地址把内容取出来
    string tempstr  = "";
    string topic  = "";
    string href   = "";
    string vdatetime = "";
    string catalogname = "";
    try
    {
     
     //tempstr = content.Replace("\"","'");
     //处理tempstr,取其内容
     topic = mc.Groups["topic"].Value.Trim();
     href = mc.Groups["url"].Value;
     vdatetime = mc.Groups["vdatetime"].Value;
     catalogname = mc.Groups["catalogname"].Value;


     string RealURL = "";
     string url1 = this.url;

     if (href.IndexOf("http") <= -1)
     {
      if (href.StartsWith("/"))
      {
       //url1 = url1.Substring(0, url1.IndexOf('/', 0, 2));
       Regex r = new Regex(@"^http://(?<d>[^/]+)/", RegexOptions.Compiled);
       RealURL = "http://"+ r.Match(url).Result("${d}") + href;
      }
      else
      {
       url1 = url1.Substring(0, url1.LastIndexOf('/'));
       RealURL = url1 + "/" + href;
      }
     }
     else
     {
      RealURL = href;
     }
     
     //this.parentForm.statusBar1.ForeColor = Color.Black;
     //this.parentForm.statusBar1.Text = "正在下载"+RealURL;
     tempstr = getWebContent(RealURL);
     //备用

     //过滤内容页
     string contentString  = tempstr;
     tempstr = FilterContent(tempstr);

     //是否内容页有分页
     if(this.pageReg != "")
     {
      try
      {
       Match mcContent;
       Regex regexContent = new Regex(this.pageReg);
       int intPage = 0;
       for (mcContent = regexContent.Match(contentString),intPage=1; mcContent.Success; mcContent = mcContent.NextMatch(),intPage++)
       {
        string pageUrl = mcContent.Groups["url"].Value;
        int page = 1;
        if(mcContent.Groups["page"].Value != "")
        {
         try
         {
          page = Convert.ToInt32(mcContent.Groups["page"].Value);
         }
         catch{}
        }

        string pageRealURL = "";
        string pageUrl1 = RealURL;

        if (pageUrl.IndexOf("http") <= -1)
        {
         if (pageUrl.StartsWith("/"))
         {
          Regex r = new Regex(@"^http://(?<d>[^/]+)/", RegexOptions.Compiled);
          pageRealURL = "http://"+ r.Match(pageUrl1).Result("${d}") + pageUrl;
         }
         else
         {
          pageUrl1 = pageUrl1.Substring(0, pageUrl1.LastIndexOf('/'));
          pageRealURL = pageUrl1 + "/" + pageUrl;
         }
        }
        else
        {
         pageRealURL = pageUrl;
        }
        if(page > 1)
        {
         //从第二页开始
         //开始抓去内容分页的下一页内容
         string nextPageContent = getWebContent(pageRealURL);
         //开始过滤
         nextPageContent = FilterContent(nextPageContent);
         tempstr += "<br/>"+nextPageContent;
         tempstr = Regex.Replace(tempstr,this.pageReg,"");
        }
       }
      }
      catch{}
     }


     if(tempstr!="" && topic != "")
     {
      //开始加入xml
      XmlElement xmlElemArt = xmlDoc.CreateElement("Article");
      xmlElemArt.SetAttribute("topic",topic);
      xmlElemArt.SetAttribute("href",href);
      xmlElemArt.SetAttribute("comefrom",this.parentForm.name.Text);
      xmlElemArt.SetAttribute("vdatetime",vdatetime);
      xmlElemArt.SetAttribute("catalogname",catalogname);
      xmlElemArt.InnerText = tempstr;
      xmlElem.AppendChild(xmlElemArt);
      
      this.parentForm.j++;

      //Add to listView
      this.parentForm.statusBar1.ForeColor=Color.Black;
      this.parentForm.statusBar1.Text = "成功抓取"+this.parentForm.j+"篇";

      ListViewItem item1 = new ListViewItem(Thread.CurrentThread.Name+"->"+i.ToString(),0);
      item1.SubItems.Add(topic);
      item1.SubItems.Add("True");
      item1.SubItems.Add(tempstr.Length +"byte");
      DateTime endTime = DateTime.Now;
      TimeSpan ts = endTime - startTime;      
      item1.SubItems.Add(ts.TotalSeconds +"秒");
      

      this.parentForm.lvResult.Items.AddRange(new ListViewItem[] { item1 });
      this.parentForm.lvResult.Items[this.parentForm.lvResult.Items.Count-1].EnsureVisible();
      this.parentForm.lvResult.TopItem.Selected = true;

      

      //更改ListView中的名字,
      for(int j = 0;j<this.parentForm.listView1.Items.Count;j++)
      {
       if((int)this.parentForm.listView1.Items[j].Tag == Thread.CurrentThread.GetHashCode())
       {
        this.parentForm.listView1.Items[j].Text = Thread.CurrentThread.Name+"-"+successCount+"";
       }
      }
      successCount++;
      

     }
     else
     {
      throw new Exception("Error");
     }
    }
    catch
    {
     //MessageBox.Show(ex.ToString());
     this.parentForm.statusBar1.ForeColor=Color.Red;
     this.parentForm.statusBar1.Text = "抓取失败  ====> "+topic;
     ListViewItem item1 = new ListViewItem(Thread.CurrentThread.Name+"->"+i.ToString(),0);
     item1.SubItems.Add(topic);
     item1.SubItems.Add("False");
     item1.SubItems.Add("");
     item1.SubItems.Add("");
     this.parentForm.lvResult.Items.AddRange(new ListViewItem[] { item1 });
     this.parentForm.lvResult.Items[this.parentForm.lvResult.Items.Count-1].EnsureVisible();
     this.parentForm.statusBar1.ForeColor = Color.Red;
    }
    
   }
   xmlDoc.DocumentElement.AppendChild(xmlElem);
   string filename = "";
   
   filename = this.parentForm.name.Text+"_"+DateTime.Now.ToShortDateString();
   int filenameNum = 1;
   while(File.Exists(folderpath+"file://%22+filename+%22_%22+filenameNum+%22.xml/"))
   {
    filenameNum++;
   }
   filename = filename+"_"+filenameNum+".xml";

   try
   {
    xmlDoc.Save(folderpath+"file://%22+filename/);
   }
   catch
   {
    this.parentForm.statusBar1.Text = "保存Xml失败 ==> "+folderpath+"file://%22+filename/;
   }

   this.parentForm.statusBar1.ForeColor = Color.Black;  
   
   //减小当前线程数
   //this.parentForm.currentThreadCount--;
   Form1.currentThreadCount--;
   this.parentForm.statusBar1.Text = Thread.CurrentThread.Name+" 执行完毕!";
   //更改ListView中的名字,
   for(int j = 0;j<this.parentForm.listView1.Items.Count;j++)
   {
    if((int)this.parentForm.listView1.Items[j].Tag == Thread.CurrentThread.GetHashCode())
    {
     this.parentForm.listView1.Items[j].ForeColor = Color.Black;
    }
   }
   this.parentForm.statusBar1.ForeColor = Color.Green;
   this.parentForm.statusBar1.Text = Thread.CurrentThread.Name+" 抓取完毕,共抓取"+this.parentForm.j+"篇,数据已保存在"+folderpath+"file://%22+filename/;
   this.parentForm.btnStrat.Enabled = true;
   this.parentForm.comboBoxListURL.Enabled = true;

  }


  /// <summary>
  /// 过滤内容
  /// </summary>
  /// <param name="tempstr"></param>
  /// <returns></returns>
  public string FilterContent(string tempstr)
  {
   string url1 = this.url;
   try
   {
    //开始对内容页分析

    tempstr = tempstr.Substring(tempstr.IndexOf(this.startTag)+this.startTag.Length);      
    tempstr = tempstr.Substring(0,tempstr.IndexOf(this.endTag));

    if(this.adStartTag != "" && this.adEndTag !="")
    {
     //clear ad
     try
     {
      tempstr = tempstr.Substring(0,tempstr.IndexOf(this.adStartTag)) + tempstr.Substring(tempstr.IndexOf(this.adEndTag,tempstr.IndexOf(this.adStartTag))+this.adEndTag.Length);
      tempstr = tempstr.Substring(0,tempstr.IndexOf(this.adStartTag)) + tempstr.Substring(tempstr.IndexOf(this.adEndTag,tempstr.IndexOf(this.adStartTag))+this.adEndTag.Length);
      tempstr = tempstr.Substring(0,tempstr.IndexOf(this.adStartTag)) + tempstr.Substring(tempstr.IndexOf(this.adEndTag,tempstr.IndexOf(this.adStartTag))+this.adEndTag.Length);
     }
     catch{}
    }
    //开始转换tempstr中的一些相对路径的图片 ,带“

    string imagename = System.Text.RegularExpressions.Regex.Match(tempstr,".*src=\"(?<image>[^\\s]+)\".*").Groups["image"].Value;


    if (imagename.IndexOf("http") <= -1)
    {
     if (imagename.StartsWith("/"))
     {
      Regex r1 = new Regex(@"^http://(?<d>[^/]+)/", RegexOptions.Compiled|RegexOptions.IgnoreCase);
      tempstr = System.Text.RegularExpressions.Regex.Replace(tempstr,"src=\"(?<image>[^\\s]+)\"","src=\"http://"+ r1.Match(this.url).Result("${d}")+"${image}\"",RegexOptions.IgnoreCase);
     }
     else
     {
      tempstr = System.Text.RegularExpressions.Regex.Replace(tempstr,"src=\"(?<image>[^\\s]+)\"","src=\""+url1.Substring(0, url1.LastIndexOf('/'))+"/${image}\"",RegexOptions.IgnoreCase);
     }
    }


    //开始转换tempstr中的一些相对路径的图片 ,不带“
    imagename = System.Text.RegularExpressions.Regex.Match(tempstr,".*src=(?<image>[^\\s]+).*").Groups["image"].Value;


    if(imagename != "")
    {
     if (imagename.IndexOf("http") <= -1)
     {
      if (imagename.StartsWith("/"))
      {
       Regex r1 = new Regex(@"^http://(?<d>[^/]+)/", RegexOptions.Compiled|RegexOptions.IgnoreCase);
       tempstr = System.Text.RegularExpressions.Regex.Replace(tempstr,"src=(?<image>[^\\s]+)","src=\"http://"+ r1.Match(this.url).Result("${d}")+"${image}\"",RegexOptions.IgnoreCase);
      }
      else
      {
       tempstr = System.Text.RegularExpressions.Regex.Replace(tempstr,"src=(?<image>[^\\s]+)","src=\""+url1.Substring(0, url1.LastIndexOf('/'))+"/${image}\"",RegexOptions.IgnoreCase);
      }
     }
    }

    //替换掉垃圾内容,简单替换,暂时不支持正则替换
    if(this.parentForm.txtReplace.Text!="")
    {
     tempstr = tempstr.Replace(this.parentForm.txtReplace.Text,"");
    }


    //如果需要下载图片,则下载图片到本地,并且替换内容中图片路径
    if(this.parentForm.IsDownloadImage.Checked)
    {
     
     Match imgMc;
     //找出所有的图片或者文件
     Regex imgReg = new Regex(".*src=\"(?<FileName>[^\\s^>]+)\".*");
     for (imgMc = imgReg.Match(tempstr); imgMc.Success; imgMc = imgMc.NextMatch())
     {
      //开始下载文件 ContentFileName
      DownloadFile(imgMc.Groups["FileName"].Value);
      //开始修改文件中的名字
      //有时间再写
     }


     //找出所有的图片或者文件
     imgReg = new Regex(".*src=(?<FileName>[^\\s^>]+).*");
     for (imgMc = imgReg.Match(tempstr); imgMc.Success; imgMc = imgMc.NextMatch())
     {
      //开始下载文件 ContentFileName
      DownloadFile(imgMc.Groups["FileName"].Value);
      //开始修改文件中的名字
      //有时间再写
     }

    }

    return tempstr;

   }
   catch
   {
    return "";
   }
  }


  /// <summary>
  /// 下载文件
  /// </summary>
  /// <param name="filename"></param>
  private void DownloadFile(string filename)
  {
   if(filename == "")
    return;
   string path = Application.StartupPath+"file://ArticleContentImageFile//";
   if(!Directory.Exists(path))
   {
    Directory.CreateDirectory(path);
   }
   try
   {
   
    HttpWebRequest oRequest = (HttpWebRequest)WebRequest.Create(filename);
    
    HttpWebResponse oResponse  = (HttpWebResponse)oRequest.GetResponse();
    StreamReader sr = new StreamReader(oResponse.GetResponseStream(), System.Text.Encoding.GetEncoding("utf-8"));

    string sResultContents = sr.ReadToEnd();
    oResponse.Close();
    byte[]  bytes = System.Text.Encoding.GetEncoding("utf-8").GetBytes(sResultContents);
    FileStream fs = new FileStream(path+filename.Substring(filename.LastIndexOf("/")), FileMode.OpenOrCreate, FileAccess.Write);
    fs.Write(bytes, 0, bytes.Length);
    fs.Flush();
    fs.Close();
   }
   catch(Exception ex)
   {
    Console.WriteLine(ex.Message);
   }

  }


  /// <summary>
  /// 抓取页面接口 - WebClient
  /// </summary>
  /// <param name="url"></param>
  /// <returns></returns>
  private string getWebContent( string contenturl )
  {
   string str = "";
   contenturl = contenturl.Replace("&amp;","&");
   WebClient client = new WebClient(); 
   client.Headers.Add("Accept","image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*");
   client.Headers.Add("Accept-Language","zh-cn");
   client.Headers.Add("UA-CPU","x86");
   client.Headers.Add("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)");
   try
   {
    byte[] buffer = client.DownloadData( contenturl );
    if(Form1.encoding == "utf-8")
    {
     str = System.Text.Encoding.GetEncoding("utf-8").GetString( buffer, 0, buffer.Length );
    }
    else
    {
     str = System.Text.Encoding.GetEncoding("gb2312").GetString( buffer, 0, buffer.Length );
    }
   }
   catch(Exception ex)
   {
    //MessageBox.Show(ex.Message);
   }

   return str;
  }
 }


 /// <summary>
 /// MyItem Object
 /// </summary>
 public class MyItem : object
 {   
  public int id;
  public string name;
  public string regex;
  public string url;
  public string starttag;
  public string endtag;
  public string adstarttag;
  public string adendtag;
  public string pageReg;
  public override string ToString()
  {
   // TODO:  添加 MyItem.ToString 实现
   return name;
  }
 }
}

/*************************************
 * CopyRight (c) edzh.com
 * Date --> 2006-3-22
 * Coder --> yesun
 *************************************/

using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;
using System.Web;
using System.Threading;
using System.Xml;
namespace GetArticle
{
    /// <summary>
    /// 夜隼信息采集器 v2.0 - 针对edzh.com开发
    /// </summary>
    public class Form1 : System.Windows.Forms.Form
    {

        public Thread t;

        DataTable listdt = new DataTable();
        private System.Windows.Forms.MainMenu mainMenu1;
        private System.Windows.Forms.MenuItem menuItem1;
        private System.Windows.Forms.MenuItem menuItem2;
        private System.Windows.Forms.MenuItem menuItem3;
        private System.Windows.Forms.Label label11;
        private System.Windows.Forms.Label label12;

        public int j = 0;
        private System.Windows.Forms.TabControl tabControl1;
        private System.Windows.Forms.TabPage tabPage1;
        public System.Windows.Forms.ListView lvResult;

        public string appPath = Application.StartupPath;


        //多线程,只允许5个线程同时进行
        public static int maxThreadCount = 4;
        public static int currentThreadCount = 0;
        public static int intCurrentThread = 0;
        public bool istrue = true;
        public Thread[] thread;


        public static string encoding = "gb2312";


        //20s判断一次当前线程数
        private System.Timers.Timer runable_Timer = new System.Timers.Timer(20 * 1000);
        private System.Windows.Forms.TabControl tabControl3;
        private System.Windows.Forms.TabPage tabPage3;
        private System.Windows.Forms.TabControl tabControl2;
        private System.Windows.Forms.TabPage tabPage2;
        public System.Windows.Forms.ListView listView1;
        private System.Windows.Forms.Label label15;
        private System.Windows.Forms.Button btnDel;
        public System.Windows.Forms.ComboBox comboBoxListURL;
        private System.Windows.Forms.Button btnReset;
        public System.Windows.Forms.Button btnStrat;
        private System.Windows.Forms.Button btnSave;
        private System.Windows.Forms.TextBox adEndTag;
        public System.Windows.Forms.CheckBox IsDownloadImage;
        private System.Windows.Forms.Label label17;
        private System.Windows.Forms.Label label16;
        private System.Windows.Forms.Label label14;
        private System.Windows.Forms.Label label13;
        public System.Windows.Forms.TextBox txtPagePatt;
        private System.Windows.Forms.TextBox txtUrl;
        public System.Windows.Forms.TextBox name;
        private System.Windows.Forms.TextBox adStartTag;
        private System.Windows.Forms.TextBox endTag;
        private System.Windows.Forms.TextBox startTag;
        public System.Windows.Forms.TextBox floder;
        private System.Windows.Forms.TextBox reg;
        private System.Windows.Forms.TextBox url;
        private System.Windows.Forms.Label label7;
        private System.Windows.Forms.CheckBox chkBoxIsMutiPage;
        private System.Windows.Forms.Label label10;
        public System.Windows.Forms.ComboBox coder;
        private System.Windows.Forms.Label label9;
        private System.Windows.Forms.Label label8;
        private System.Windows.Forms.Label label6;
        public System.Windows.Forms.TextBox txtReplace;
        private System.Windows.Forms.Label label4;
        private System.Windows.Forms.Label label3;
        private System.Windows.Forms.Label label2;
        private System.Windows.Forms.Label label1;
        private System.Windows.Forms.Label label18;
        private System.Windows.Forms.NumericUpDown txtstartpage;
        private System.Windows.Forms.NumericUpDown txtleijia;
        private System.Windows.Forms.NumericUpDown txtendpage;
        private System.Windows.Forms.NumericUpDown textBoxMaxThread;
        public System.Windows.Forms.StatusBar statusBar1;



        /// <summary>
        /// 必需的设计器变量。
        /// </summary>
        private System.ComponentModel.Container components = null;

        public Form1()
        {
            //
            // Windows 窗体设计器支持所必需的
            //
            InitializeComponent();
            BindLink();
            BindEncode();
            //
            // TODO: 在 InitializeComponent 调用后添加任何构造函数代码
            //
        }

        /// <summary>
        /// 清理所有正在使用的资源。
        /// </summary>
        protected override void Dispose(bool disposing)
        {
            if (disposing)
            {
                if (components != null)
                {
                    components.Dispose();
                }
            }
            base.Dispose(disposing);
        }

        #region Windows 窗体设计器生成的代码
        /// <summary>
        /// 设计器支持所需的方法 - 不要使用代码编辑器修改
        /// 此方法的内容。
        /// </summary>
        private void InitializeComponent()
        {
            this.mainMenu1 = new System.Windows.Forms.MainMenu();
            this.menuItem1 = new System.Windows.Forms.MenuItem();
            this.menuItem2 = new System.Windows.Forms.MenuItem();
            this.menuItem3 = new System.Windows.Forms.MenuItem();
            this.label11 = new System.Windows.Forms.Label();
            this.label12 = new System.Windows.Forms.Label();
            this.tabControl1 = new System.Windows.Forms.TabControl();
            this.tabPage1 = new System.Windows.Forms.TabPage();
            this.lvResult = new System.Windows.Forms.ListView();
            this.tabControl3 = new System.Windows.Forms.TabControl();
            this.tabPage3 = new System.Windows.Forms.TabPage();
            this.tabControl2 = new System.Windows.Forms.TabControl();
            this.tabPage2 = new System.Windows.Forms.TabPage();
            this.listView1 = new System.Windows.Forms.ListView();
            this.label15 = new System.Windows.Forms.Label();
            this.btnDel = new System.Windows.Forms.Button();
            this.comboBoxListURL = new System.Windows.Forms.ComboBox();
            this.btnReset = new System.Windows.Forms.Button();
            this.btnStrat = new System.Windows.Forms.Button();
            this.btnSave = new System.Windows.Forms.Button();
            this.adEndTag = new System.Windows.Forms.TextBox();
            this.IsDownloadImage = new System.Windows.Forms.CheckBox();
            this.label17 = new System.Windows.Forms.Label();
            this.label16 = new System.Windows.Forms.Label();
            this.label14 = new System.Windows.Forms.Label();
            this.label13 = new System.Windows.Forms.Label();
            this.txtPagePatt = new System.Windows.Forms.TextBox();
            this.txtUrl = new System.Windows.Forms.TextBox();
            this.name = new System.Windows.Forms.TextBox();
            this.adStartTag = new System.Windows.Forms.TextBox();
            this.endTag = new System.Windows.Forms.TextBox();
            this.startTag = new System.Windows.Forms.TextBox();
            this.floder = new System.Windows.Forms.TextBox();
            this.reg = new System.Windows.Forms.TextBox();
            this.url = new System.Windows.Forms.TextBox();
            this.label7 = new System.Windows.Forms.Label();
            this.chkBoxIsMutiPage = new System.Windows.Forms.CheckBox();
            this.label10 = new System.Windows.Forms.Label();
            this.coder = new System.Windows.Forms.ComboBox();
            this.label9 = new System.Windows.Forms.Label();
            this.label8 = new System.Windows.Forms.Label();
            this.label6 = new System.Windows.Forms.Label();
            this.txtReplace = new System.Windows.Forms.TextBox();
            this.label4 = new System.Windows.Forms.Label();
            this.label3 = new System.Windows.Forms.Label();
            this.label2 = new System.Windows.Forms.Label();
            this.label1 = new System.Windows.Forms.Label();
            this.label18 = new System.Windows.Forms.Label();
            this.txtstartpage = new System.Windows.Forms.NumericUpDown();
            this.txtleijia = new System.Windows.Forms.NumericUpDown();
            this.txtendpage = new System.Windows.Forms.NumericUpDown();
            this.textBoxMaxThread = new System.Windows.Forms.NumericUpDown();
            this.statusBar1 = new System.Windows.Forms.StatusBar();
            this.tabControl1.SuspendLayout();
            this.tabPage1.SuspendLayout();
            this.tabControl3.SuspendLayout();
            this.tabPage3.SuspendLayout();
            this.tabControl2.SuspendLayout();
            this.tabPage2.SuspendLayout();
            ((System.ComponentModel.ISupportInitialize) (this.txtstartpage)).BeginInit();
            ((System.ComponentModel.ISupportInitialize) (this.txtleijia)).BeginInit();
            ((System.ComponentModel.ISupportInitialize) (this.txtendpage)).BeginInit();
            ((System.ComponentModel.ISupportInitialize) (this.textBoxMaxThread)).BeginInit();
            this.SuspendLayout();
            // 
            // mainMenu1
            // 
            this.mainMenu1.MenuItems.AddRange(new System.Windows.Forms.MenuItem[] {
                       this.menuItem1});
            // 
            // menuItem1
            // 
            this.menuItem1.Index = 0;
            this.menuItem1.MenuItems.AddRange(new System.Windows.Forms.MenuItem[] {
                       this.menuItem2,
                       this.menuItem3});
            this.menuItem1.Text = "文件";
            // 
            // menuItem2
            // 
            this.menuItem2.Index = 0;
            this.menuItem2.Text = "新建";
            this.menuItem2.Click += new System.EventHandler(this.menuItem2_Click);
            // 
            // menuItem3
            // 
            this.menuItem3.Index = 1;
            this.menuItem3.Text = "退出";
            this.menuItem3.Click += new System.EventHandler(this.menuItem3_Click);
            // 
            // label11
            // 
            this.label11.Location = new System.Drawing.Point(88, 80);
            this.label11.Name = "label11";
            this.label11.Size = new System.Drawing.Size(32, 16);
            this.label11.TabIndex = 27;
            this.label11.Text = "首页";
            // 
            // label12
            // 
            this.label12.Location = new System.Drawing.Point(168, 80);
            this.label12.Name = "label12";
            this.label12.Size = new System.Drawing.Size(32, 16);
            this.label12.TabIndex = 29;
            this.label12.Text = "尾页";
            // 
            // tabControl1
            // 
            this.tabControl1.Controls.Add(this.tabPage1);
            this.tabControl1.Location = new System.Drawing.Point(8, 416);
            this.tabControl1.Name = "tabControl1";
            this.tabControl1.SelectedIndex = 0;
            this.tabControl1.Size = new System.Drawing.Size(768, 168);
            this.tabControl1.TabIndex = 31;
            // 
            // tabPage1
            // 
            this.tabPage1.Controls.Add(this.lvResult);
            this.tabPage1.Location = new System.Drawing.Point(4, 21);
            this.tabPage1.Name = "tabPage1";
            this.tabPage1.Size = new System.Drawing.Size(760, 143);
            this.tabPage1.TabIndex = 0;
            this.tabPage1.Text = "Result";
            // 
            // lvResult
            // 
            this.lvResult.Location = new System.Drawing.Point(0, 8);
            this.lvResult.Name = "lvResult";
            this.lvResult.Size = new System.Drawing.Size(760, 144);
            this.lvResult.TabIndex = 0;
            // 
            // tabControl3
            // 
            this.tabControl3.Controls.Add(this.tabPage3);
            this.tabControl3.Location = new System.Drawing.Point(8, 8);
            this.tabControl3.Name = "tabControl3";
            this.tabControl3.SelectedIndex = 0;
            this.tabControl3.Size = new System.Drawing.Size(768, 400);
            this.tabControl3.TabIndex = 32;
            // 
            // tabPage3
            // 
            this.tabPage3.Controls.Add(this.textBoxMaxThread);
            this.tabPage3.Controls.Add(this.txtleijia);
            this.tabPage3.Controls.Add(this.txtendpage);
            this.tabPage3.Controls.Add(this.txtstartpage);
            this.tabPage3.Controls.Add(this.coder);
            this.tabPage3.Controls.Add(this.label1);
            this.tabPage3.Controls.Add(this.adEndTag);
            this.tabPage3.Controls.Add(this.IsDownloadImage);
            this.tabPage3.Controls.Add(this.label17);
            this.tabPage3.Controls.Add(this.label16);
            this.tabPage3.Controls.Add(this.label14);
            this.tabPage3.Controls.Add(this.label13);
            this.tabPage3.Controls.Add(this.txtPagePatt);
            this.tabPage3.Controls.Add(this.txtUrl);
            this.tabPage3.Controls.Add(this.name);
            this.tabPage3.Controls.Add(this.adStartTag);
            this.tabPage3.Controls.Add(this.endTag);
            this.tabPage3.Controls.Add(this.startTag);
            this.tabPage3.Controls.Add(this.floder);
            this.tabPage3.Controls.Add(this.reg);
            this.tabPage3.Controls.Add(this.url);
            this.tabPage3.Controls.Add(this.label7);
            this.tabPage3.Controls.Add(this.chkBoxIsMutiPage);
            this.tabPage3.Controls.Add(this.label10);
            this.tabPage3.Controls.Add(this.label9);
            this.tabPage3.Controls.Add(this.label8);
            this.tabPage3.Controls.Add(this.label6);
            this.tabPage3.Controls.Add(this.txtReplace);
            this.tabPage3.Controls.Add(this.label3);
            this.tabPage3.Controls.Add(this.label2);
            this.tabPage3.Controls.Add(this.tabControl2);
            this.tabPage3.Controls.Add(this.label15);
            this.tabPage3.Controls.Add(this.btnDel);
            this.tabPage3.Controls.Add(this.comboBoxListURL);
            this.tabPage3.Controls.Add(this.btnReset);
            this.tabPage3.Controls.Add(this.btnStrat);
            this.tabPage3.Controls.Add(this.btnSave);
            this.tabPage3.Controls.Add(this.label11);
            this.tabPage3.Controls.Add(this.label12);
            this.tabPage3.Controls.Add(this.label4);
            this.tabPage3.Controls.Add(this.label18);
            this.tabPage3.Location = new System.Drawing.Point(4, 21);
            this.tabPage3.Name = "tabPage3";
            this.tabPage3.Size = new System.Drawing.Size(760, 375);
            this.tabPage3.TabIndex = 0;
            this.tabPage3.Text = "参数设置";
            // 
            // tabControl2
            // 
            this.tabControl2.Controls.Add(this.tabPage2);
            this.tabControl2.Location = new System.Drawing.Point(448, 120);
            this.tabControl2.Name = "tabControl2";
            this.tabControl2.SelectedIndex = 0;
            this.tabControl2.Size = new System.Drawing.Size(304, 248);
            this.tabControl2.TabIndex = 46;
            // 
            // tabPage2
            // 
            this.tabPage2.Controls.Add(this.listView1);
            this.tabPage2.Location = new System.Drawing.Point(4, 21);
            this.tabPage2.Name = "tabPage2";
            this.tabPage2.Size = new System.Drawing.Size(296, 223);
            this.tabPage2.TabIndex = 0;
            this.tabPage2.Text = "线程管理";
            // 
            // listView1
            // 
            this.listView1.Location = new System.Drawing.Point(0, 0);
            this.listView1.Name = "listView1";
            this.listView1.Size = new System.Drawing.Size(304, 288);
            this.listView1.TabIndex = 0;
            // 
            // label15
            // 
            this.label15.Location = new System.Drawing.Point(456, 27);
            this.label15.Name = "label15";
            this.label15.Size = new System.Drawing.Size(56, 23);
            this.label15.TabIndex = 45;
            this.label15.Text = "常用网址";
            // 
            // btnDel
            // 
            this.btnDel.Location = new System.Drawing.Point(544, 91);
            this.btnDel.Name = "btnDel";
            this.btnDel.Size = new System.Drawing.Size(80, 23);
            this.btnDel.TabIndex = 44;
            this.btnDel.Text = "删除地址";
            this.btnDel.Click += new System.EventHandler(this.btnDel_Click);
            // 
            // comboBoxListURL
            // 
            this.comboBoxListURL.Location = new System.Drawing.Point(520, 19);
            this.comboBoxListURL.Name = "comboBoxListURL";
            this.comboBoxListURL.Size = new System.Drawing.Size(216, 20);
            this.comboBoxListURL.TabIndex = 43;
            this.comboBoxListURL.SelectedIndexChanged += new System.EventHandler(this.comboBoxListURL_SelectedIndexChanged);
            // 
            // btnReset
            // 
            this.btnReset.Location = new System.Drawing.Point(632, 91);
            this.btnReset.Name = "btnReset";
            this.btnReset.Size = new System.Drawing.Size(80, 23);
            this.btnReset.TabIndex = 41;
            this.btnReset.Text = " 取 消 ";
            this.btnReset.Click += new System.EventHandler(this.btnReset_Click);
            // 
            // btnStrat
            // 
            this.btnStrat.Location = new System.Drawing.Point(632, 59);
            this.btnStrat.Name = "btnStrat";
            this.btnStrat.Size = new System.Drawing.Size(80, 24);
            this.btnStrat.TabIndex = 40;
            this.btnStrat.Text = "开始抓取";
            this.btnStrat.Click += new System.EventHandler(this.btnStrat_Click);
            // 
            // btnSave
            // 
            this.btnSave.Location = new System.Drawing.Point(544, 59);
            this.btnSave.Name = "btnSave";
            this.btnSave.Size = new System.Drawing.Size(80, 23);
            this.btnSave.TabIndex = 42;
            this.btnSave.Text = "保存地址";
            this.btnSave.Click += new System.EventHandler(this.btnSave_Click);
            // 
            // adEndTag
            // 
            this.adEndTag.Location = new System.Drawing.Point(80, 272);
            this.adEndTag.Multiline = true;
            this.adEndTag.Name = "adEndTag";
            this.adEndTag.Size = new System.Drawing.Size(352, 20);
            this.adEndTag.TabIndex = 59;
            this.adEndTag.Text = "内容中需要过滤得广告结束HTML标记";
            // 
            // IsDownloadImage
            // 
            this.IsDownloadImage.Location = new System.Drawing.Point(344, 16);
            this.IsDownloadImage.Name = "IsDownloadImage";
            this.IsDownloadImage.TabIndex = 76;
            this.IsDownloadImage.Text = "是否下载图片";
            // 
            // label17
            // 
            this.label17.Location = new System.Drawing.Point(24, 328);
            this.label17.Name = "label17";
            this.label17.Size = new System.Drawing.Size(56, 16);
            this.label17.TabIndex = 74;
            this.label17.Text = "页码正则";
            // 
            // label16
            // 
            this.label16.Location = new System.Drawing.Point(336, 80);
            this.label16.Name = "label16";
            this.label16.Size = new System.Drawing.Size(48, 16);
            this.label16.TabIndex = 72;
            this.label16.Text = "线程数";
            // 
            // label14
            // 
            this.label14.Location = new System.Drawing.Point(24, 304);
            this.label14.Name = "label14";
            this.label14.Size = new System.Drawing.Size(56, 23);
            this.label14.TabIndex = 71;
            this.label14.Text = "过滤内容";
            // 
            // label13
            // 
            this.label13.Location = new System.Drawing.Point(248, 80);
            this.label13.Name = "label13";
            this.label13.Size = new System.Drawing.Size(48, 16);
            this.label13.TabIndex = 69;
            this.label13.Text = "累加数";
            // 
            // txtPagePatt
            // 
            this.txtPagePatt.Location = new System.Drawing.Point(80, 320);
            this.txtPagePatt.Multiline = true;
            this.txtPagePatt.Name = "txtPagePatt";
            this.txtPagePatt.Size = new System.Drawing.Size(352, 20);
            this.txtPagePatt.TabIndex = 75;
            this.txtPagePatt.Text = "如果内容页还带分页,则写上内容页分页正则";
            // 
            // txtUrl
            // 
            this.txtUrl.Enabled = false;
            this.txtUrl.Location = new System.Drawing.Point(80, 112);
            this.txtUrl.Name = "txtUrl";
            this.txtUrl.Size = new System.Drawing.Size(352, 21);
            this.txtUrl.TabIndex = 64;
            this.txtUrl.Text = "请使用@pageid代替页码";
            // 
            // name
            // 
            this.name.Location = new System.Drawing.Point(80, 16);
            this.name.Name = "name";
            this.name.Size = new System.Drawing.Size(136, 21);
            this.name.TabIndex = 61;
            this.name.Text = "";
            // 
            // adStartTag
            // 
            this.adStartTag.Location = new System.Drawing.Point(80, 248);
            this.adStartTag.Multiline = true;
            this.adStartTag.Name = "adStartTag";
            this.adStartTag.Size = new System.Drawing.Size(352, 20);
            this.adStartTag.TabIndex = 57;
            this.adStartTag.Text = "内容中需要过滤得广告开始HTML标记";
            // 
            // endTag
            // 
            this.endTag.Location = new System.Drawing.Point(80, 224);
            this.endTag.Multiline = true;
            this.endTag.Name = "endTag";
            this.endTag.Size = new System.Drawing.Size(352, 20);
            this.endTag.TabIndex = 55;
            this.endTag.Text = "您要抓取的内容结束HTML标记";
            // 
            // startTag
            // 
            this.startTag.Location = new System.Drawing.Point(80, 200);
            this.startTag.Multiline = true;
            this.startTag.Name = "startTag";
            this.startTag.Size = new System.Drawing.Size(352, 20);
            this.startTag.TabIndex = 53;
            this.startTag.Text = "您要抓取的内容开始HTML标记";
            // 
            // floder
            // 
            this.floder.Location = new System.Drawing.Point(80, 344);
            this.floder.Name = "floder";
            this.floder.Size = new System.Drawing.Size(352, 21);
            this.floder.TabIndex = 51;
            this.floder.Text = Application.StartupPath + "\\ArticleListXml\\" + DateTime.Now.ToShortDateString();
            // 
            // reg
            // 
            this.reg.Location = new System.Drawing.Point(80, 136);
            this.reg.Multiline = true;
            this.reg.Name = "reg";
            this.reg.Size = new System.Drawing.Size(352, 56);
            this.reg.TabIndex = 48;
            this.reg.Text = "列表页正则表达式";
            // 
            // url
            // 
            this.url.Location = new System.Drawing.Point(80, 40);
            this.url.Name = "url";
            this.url.Size = new System.Drawing.Size(352, 21);
            this.url.TabIndex = 47;
            this.url.Text = "";
            // 
            // label7
            // 
            this.label7.Location = new System.Drawing.Point(8, 280);
            this.label7.Name = "label7";
            this.label7.Size = new System.Drawing.Size(72, 23);
            this.label7.TabIndex = 58;
            this.label7.Text = "广告结束Tag";
            // 
            // chkBoxIsMutiPage
            // 
            this.chkBoxIsMutiPage.Location = new System.Drawing.Point(24, 72);
            this.chkBoxIsMutiPage.Name = "chkBoxIsMutiPage";
            this.chkBoxIsMutiPage.Size = new System.Drawing.Size(64, 24);
            this.chkBoxIsMutiPage.TabIndex = 65;
            this.chkBoxIsMutiPage.Text = "按分页";
            this.chkBoxIsMutiPage.CheckedChanged += new System.EventHandler(this.chkBoxIsMutiPage_CheckedChanged);
            // 
            // label10
            // 
            this.label10.Location = new System.Drawing.Point(24, 120);
            this.label10.Name = "label10";
            this.label10.Size = new System.Drawing.Size(80, 23);
            this.label10.TabIndex = 63;
            this.label10.Text = "通用地址";
            // 
            // coder
            // 
            this.coder.Location = new System.Drawing.Point(256, 16);
            this.coder.Name = "coder";
            this.coder.Size = new System.Drawing.Size(80, 20);
            this.coder.TabIndex = 62;
            this.coder.Text = "comboBox1";
            // 
            // label9
            // 
            this.label9.Location = new System.Drawing.Point(24, 24);
            this.label9.Name = "label9";
            this.label9.Size = new System.Drawing.Size(56, 23);
            this.label9.TabIndex = 60;
            this.label9.Text = "网站名称";
            // 
            // label8
            // 
            this.label8.Location = new System.Drawing.Point(8, 256);
            this.label8.Name = "label8";
            this.label8.Size = new System.Drawing.Size(88, 23);
            this.label8.TabIndex = 56;
            this.label8.Text = "广告开始Tag";
            // 
            // label6
            // 
            this.label6.Location = new System.Drawing.Point(0, 232);
            this.label6.Name = "label6";
            this.label6.Size = new System.Drawing.Size(80, 23);
            this.label6.TabIndex = 54;
            this.label6.Text = "内容终止标记";
            // 
            // txtReplace
            // 
            this.txtReplace.Location = new System.Drawing.Point(80, 296);
            this.txtReplace.Multiline = true;
            this.txtReplace.Name = "txtReplace";
            this.txtReplace.Size = new System.Drawing.Size(352, 20);
            this.txtReplace.TabIndex = 70;
            this.txtReplace.Text = "内容中需要过滤的内容";
            // 
            // label4
            // 
            this.label4.Location = new System.Drawing.Point(0, 208);
            this.label4.Name = "label4";
            this.label4.Size = new System.Drawing.Size(80, 23);
            this.label4.TabIndex = 52;
            this.label4.Text = "内容起始标记";
            // 
            // label3
            // 
            this.label3.Location = new System.Drawing.Point(24, 352);
            this.label3.Name = "label3";
            this.label3.Size = new System.Drawing.Size(56, 16);
            this.label3.TabIndex = 50;
            this.label3.Text = "保存地址";
            // 
            // label2
            // 
            this.label2.Location = new System.Drawing.Point(24, 160);
            this.label2.Name = "label2";
            this.label2.Size = new System.Drawing.Size(56, 23);
            this.label2.TabIndex = 49;
            this.label2.Text = "列表正则";
            // 
            // label1
            // 
            this.label1.Location = new System.Drawing.Point(24, 48);
            this.label1.Name = "label1";
            this.label1.Size = new System.Drawing.Size(56, 23);
            this.label1.TabIndex = 77;
            this.label1.Text = "网站地址";
            // 
            // label18
            // 
            this.label18.Location = new System.Drawing.Point(224, 24);
            this.label18.Name = "label18";
            this.label18.Size = new System.Drawing.Size(56, 23);
            this.label18.TabIndex = 78;
            this.label18.Text = "编码";
            // 
            // txtstartpage
            // 
            this.txtstartpage.Enabled = false;
            this.txtstartpage.Location = new System.Drawing.Point(120, 72);
            this.txtstartpage.Maximum = new System.Decimal(new int[] {
                   500,
                   0,
                   0,
                   0});
            this.txtstartpage.Name = "txtstartpage";
            this.txtstartpage.Size = new System.Drawing.Size(40, 21);
            this.txtstartpage.TabIndex = 79;
            this.txtstartpage.Value = new System.Decimal(new int[] {
                    2,
                    0,
                    0,
                    0});
            // 
            // txtleijia
            // 
            this.txtleijia.Enabled = false;
            this.txtleijia.Location = new System.Drawing.Point(288, 72);
            this.txtleijia.Name = "txtleijia";
            this.txtleijia.Size = new System.Drawing.Size(40, 21);
            this.txtleijia.TabIndex = 81;
            this.txtleijia.Value = new System.Decimal(new int[] {
                 1,
                 0,
                 0,
                 0});
            // 
            // txtendpage
            // 
            this.txtendpage.Enabled = false;
            this.txtendpage.Location = new System.Drawing.Point(200, 72);
            this.txtendpage.Name = "txtendpage";
            this.txtendpage.Size = new System.Drawing.Size(40, 21);
            this.txtendpage.TabIndex = 80;
            this.txtendpage.Value = new System.Decimal(new int[] {
                  20,
                  0,
                  0,
                  0});
            // 
            // textBoxMaxThread
            // 
            this.textBoxMaxThread.Enabled = false;
            this.textBoxMaxThread.Location = new System.Drawing.Point(384, 72);
            this.textBoxMaxThread.Name = "textBoxMaxThread";
            this.textBoxMaxThread.Size = new System.Drawing.Size(40, 21);
            this.textBoxMaxThread.TabIndex = 82;
            this.textBoxMaxThread.Value = new System.Decimal(new int[] {
                     4,
                     0,
                     0,
                     0});
            // 
            // statusBar1
            // 
            this.statusBar1.Location = new System.Drawing.Point(0, 595);
            this.statusBar1.Name = "statusBar1";
            this.statusBar1.Size = new System.Drawing.Size(786, 16);
            this.statusBar1.TabIndex = 33;
            this.statusBar1.Text = "就绪";
            this.statusBar1.Left = 20;
            // 
            // Form1
            // 
            this.AutoScale = false;
            this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);
            this.ClientSize = new System.Drawing.Size(786, 611);
            this.Controls.Add(this.statusBar1);
            this.Controls.Add(this.tabControl3);
            this.Controls.Add(this.tabControl1);
            this.FormBorderStyle = System.Windows.Forms.FormBorderStyle.FixedDialog;
            this.MaximizeBox = false;
            this.Menu = this.mainMenu1;
            this.Name = "Form1";
            this.Text = "YESUN文章自动抓取工具 v2.0";
            this.tabControl1.ResumeLayout(false);
            this.tabPage1.ResumeLayout(false);
            this.tabControl3.ResumeLayout(false);
            this.tabPage3.ResumeLayout(false);
            this.tabControl2.ResumeLayout(false);
            this.tabPage2.ResumeLayout(false);
            ((System.ComponentModel.ISupportInitialize) (this.txtstartpage)).EndInit();
            ((System.ComponentModel.ISupportInitialize) (this.txtleijia)).EndInit();
            ((System.ComponentModel.ISupportInitialize) (this.txtendpage)).EndInit();
            ((System.ComponentModel.ISupportInitialize) (this.textBoxMaxThread)).EndInit();
            this.ResumeLayout(false);

        }
        #endregion

        /// <summary>
        /// 应用程序的主入口点。
        /// </summary>
        [STAThread]
        static void Main()
        {
            Application.Run(new Form1());
        }


        //static AutoResetEvent ev=new AutoResetEvent(false);
        static ManualResetEvent ev = new ManualResetEvent(false);


        /// <summary>
        /// 开始抓取
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btnStrat_Click(object sender, System.EventArgs e)
        {
            //测试抓取文章
            string url = this.url.Text.Trim();
            string reg = this.reg.Text.Trim();
            string folderpath = this.floder.Text.Trim();
            string startTag = this.startTag.Text.Trim();
            string endTag = this.endTag.Text.Trim();
            string adstartTag = this.adStartTag.Text.Trim();
            string adendTag = this.adEndTag.Text.Trim();

            encoding = this.coder.SelectedItem.ToString().ToLower();


            //归零
            j = 0;
            this.comboBoxListURL.Enabled = false;
            try
            {
                maxThreadCount = Int32.Parse(this.textBoxMaxThread.Text);
            }
            catch { }


            //ListView Clear
            this.lvResult.Clear();
            this.lvResult.FullRowSelect = true;
            this.lvResult.View = View.LargeIcon;
            this.lvResult.View = View.Details;
            this.lvResult.Columns.Add("编号", 80, HorizontalAlignment.Center);
            this.lvResult.Columns.Add("标题", 300, HorizontalAlignment.Left);
            this.lvResult.Columns.Add("状态", 50, HorizontalAlignment.Left);
            this.lvResult.Columns.Add("大小", 65, HorizontalAlignment.Left);
            this.lvResult.Columns.Add("耗时", 75, HorizontalAlignment.Left);

            ImageList imgList = new ImageList();
            try
            {
                Image largeImg = Image.FromFile(Application.StartupPath + "\\largeImg.gif");
                imgList.Images.Add(largeImg);
            }
            catch { }
            this.listView1.Columns.Clear();
            this.listView1.Items.Clear();
            this.listView1.LargeImageList = imgList;
            this.listView1.Scrollable = true;
            //this.listView1.SmallImageList = this.imglist_fild;
            this.listView1.View = View.LargeIcon;
            //this.listView1.View = View.Details;
            this.listView1.GridLines = true;
            this.listView1.FullRowSelect = true;
            this.listView1.Columns.Add("名称", 60, HorizontalAlignment.Left);


            if (this.chkBoxIsMutiPage.Checked)
            {
                //多页搜索
                if (txtUrl.Text.Trim() != "")
                {
                    int startpage = 0;
                    int endpage = 0;
                    int leijia = 1;
                    try
                    {
                        startpage = Convert.ToInt32(this.txtstartpage.Text);
                        endpage = Convert.ToInt32(this.txtendpage.Text);
                        leijia = Convert.ToInt32(this.txtleijia.Text.Trim());

                    }
                    catch
                    {
                    }
                    //调用抓取接口
                    //for(int i=startpage;i<=endpage;i++)
                    thread = new Thread[endpage - startpage + 1];

                    this.btnStrat.Enabled = false;

                    int tempInt = 0;
                    this.statusBar1.Text = "正在初始化线程...";
                    for (int i = startpage; i <= endpage; i = i + leijia)
                    {
                        GetArticle ga = new GetArticle();
                        //传入相关参数
                        ga.url = txtUrl.Text.Replace("@pageid", i.ToString());
                        ga.reg = reg;
                        ga.pageReg = this.txtPagePatt.Text;
                        ga.folderpath = folderpath;
                        ga.startTag = startTag;
                        ga.endTag = endTag;
                        ga.adStartTag = adstartTag;
                        ga.adEndTag = adendTag;
                        ga.parentForm = this;


                        Thread th = new Thread(new ThreadStart(ga.strat));
                        thread[tempInt++] = th;
                    }
                    this.statusBar1.Text = "共" + tempInt + "个线程保存队列中,正在启动线程,请稍候...";


                    runable_Timer.Elapsed += new System.Timers.ElapsedEventHandler(timer_CheckThread);
                    runable_Timer.Start();
                }
                else
                {
                    MessageBox.Show("请输入通用网址");
                }

            }
            else
            {
                if (url != "")
                {
                    //调用抓取接口
                    GetArticle ga = new GetArticle();
                    //传入相关参数
                    ga.url = url;
                    ga.reg = reg;
                    ga.pageReg = this.txtPagePatt.Text;
                    ga.folderpath = folderpath;
                    ga.startTag = startTag;
                    ga.endTag = endTag;
                    ga.adStartTag = adstartTag;
                    ga.adEndTag = adendTag;
                    ga.parentForm = this;


                    this.btnStrat.Enabled = false;
                    this.statusBar1.Text = "正在准备抓取数据,请稍候...";



                    ThreadStart ts = new ThreadStart(ga.strat);
                    t = new Thread(ts);
                    t.Name = "线程#1";

                    ListViewItem item = new ListViewItem(t.Name, 0);
                    item.SubItems.Add(t.Name);
                    item.Tag = t.GetHashCode();
                    item.Text = t.Name;
                    item.ForeColor = Color.Red;
                    item.EnsureVisible();
                    this.listView1.Items.AddRange(new ListViewItem[] { item });

                    t.Priority = ThreadPriority.Lowest;
                    t.Start();
                }
                else
                {
                    MessageBox.Show("请输入网址");
                }
            }

        }


        /// <summary>
        /// 定时检查线程数
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        void timer_CheckThread(object sender, System.Timers.ElapsedEventArgs e)
        {
            System.Timers.Timer initTimer = (System.Timers.Timer) sender;
            initTimer.Stop();


            //判断当前线程数,如果不足5个,则Join新的线程
            if (currentThreadCount < maxThreadCount)
            {
                //表示当前线程可以加入线程,使得总执行线程为5个
                try
                {
                    if (thread[intCurrentThread] != null && thread[intCurrentThread].ThreadState == ThreadState.Unstarted)
                    {
                        thread[intCurrentThread].Priority = ThreadPriority.Lowest;
                        currentThreadCount++;
                        thread[intCurrentThread].Name = "线程#" + (intCurrentThread + 1);

                        this.statusBar1.Text = thread[intCurrentThread].Name + "已开始执行...";

                        thread[intCurrentThread].Start();

                        ListViewItem item = new ListViewItem("线程#" + (intCurrentThread + 1), 0);
                        //Item.Tag 和 Thread.GetHashCode关联
                        item.Tag = thread[intCurrentThread].GetHashCode();
                        item.SubItems.Add("线程#" + (intCurrentThread + 1));
                        item.Text = "线程#" + (intCurrentThread + 1);
                        item.ForeColor = Color.Red;
                        item.EnsureVisible();
                        this.listView1.Items.AddRange(new ListViewItem[] { item });

                        intCurrentThread++;


                    }
                    else
                    {
                        istrue = false;
                    }
                }
                catch
                {
                    istrue = false;
                }
            }

            initTimer.Interval = 3 * 1000;
            initTimer.Start();
        }



        /// <summary>
        /// 设置编码
        /// </summary>
        void BindEncode()
        {
            //clear list

            for (int i = this.coder.Items.Count - 1; i >= 0; i--)
            {
                this.coder.Items.RemoveAt(i);
            }

            this.coder.Items.Add((object) "gb2312");
            this.coder.Items.Add((object) "utf-8");
            this.coder.SelectedIndex = 0;
        }

        /// <summary>
        /// 绑定一些默认的网站
        /// </summary>
        void BindLink()
        {

            //ComBox list


            for (int i = this.comboBoxListURL.Items.Count - 1; i >= 0; i--)
            {
                this.comboBoxListURL.Items.RemoveAt(i);
            }

            //绑定默认数据
            try
            {
                MyItem myitem;
                XmlDataDocument xmlDoc = new XmlDataDocument();
                xmlDoc.Load(Application.StartupPath + "/URL.xml");
                XmlNodeList xmlNodes = xmlDoc.SelectNodes("//root/url");
                for (int i = 0; i < xmlNodes.Count; i++)
                {
                    XmlElement xmlElem = (XmlElement) xmlNodes[i];
                    myitem = new MyItem();
                    myitem.id = Convert.ToInt32(xmlElem.GetAttribute("id"));
                    myitem.name = Convert.ToString(xmlElem.GetAttribute("name"));
                    myitem.regex = b2a(xmlElem.GetAttribute("regex"));
                    myitem.url = b2a(xmlElem.GetAttribute("url"));
                    myitem.starttag = b2a(xmlElem.GetAttribute("starttag"));
                    myitem.endtag = b2a(xmlElem.GetAttribute("endtag"));
                    myitem.adstarttag = b2a(xmlElem.GetAttribute("adstarttag"));
                    myitem.adendtag = b2a(xmlElem.GetAttribute("adendtag"));
                    try
                    {
                        myitem.pageReg = b2a(xmlElem.GetAttribute("pageReg"));
                    }
                    catch { }
                    //add to list
                    this.comboBoxListURL.Items.Add(myitem);
                }
                this.comboBoxListURL.SelectedIndex = 0;
            }
            catch
            {
            }
        }


        /// <summary>
        /// 转换一些特殊字符
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        string a2b(string str)
        {
            str = str.Replace("<", "<");
            str = str.Replace(">", ">");
            return str;
        }

        /// <summary>
        /// 转换一些特殊字符
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        string b2a(string str)
        {
            str = str.Replace("<", "<");
            str = str.Replace(">", ">");
            return str;
        }


        /// <summary>
        /// 终止搜索
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btnReset_Click(object sender, System.EventArgs e)
        {
            //中止线程
            try
            {
                istrue = false;
                if (this.chkBoxIsMutiPage.Checked)
                {
                    //Application.Exit();
                    //终止所有运行中的线程
                    for (int i = 0; i < 10; i++)
                    {
                        thread[i].Abort();
                        this.statusBar1.Text = "线程" + thread[i].Name + "已终止!";
                    }
                    //归零
                    j = 0;
                    currentThreadCount = 0;
                    intCurrentThread = 0;

                }
                else
                {
                    t.Abort();
                }
                this.statusBar1.Text = "已终止搜索!";
                this.comboBoxListURL.Enabled = true;
                this.btnStrat.Enabled = true;
                this.btnReset.Enabled = true;
            }
            catch
            {
            }
        }


        /// <summary>
        /// 保存网站
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btnSave_Click(object sender, System.EventArgs e)
        {
            try
            {
                XmlDataDocument xmlDoc = new XmlDataDocument();
                XmlElement xmlElem;
                if (File.Exists(Application.StartupPath + "/URL.xml"))
                {
                    xmlDoc.Load(Application.StartupPath + "/URL.xml");
                }
                else
                {
                    xmlDoc.LoadXml("<root />");
                }
                xmlElem = xmlDoc.CreateElement("url");
                xmlElem.SetAttribute("id", xmlDoc.SelectNodes("/root/url").Count.ToString());
                xmlElem.SetAttribute("name", a2b(this.name.Text));
                xmlElem.SetAttribute("url", a2b(this.url.Text));
                xmlElem.SetAttribute("regex", a2b(this.reg.Text));
                xmlElem.SetAttribute("starttag", a2b(this.startTag.Text));
                xmlElem.SetAttribute("endtag", a2b(this.endTag.Text));
                xmlElem.SetAttribute("adstarttag", a2b(this.adStartTag.Text));
                xmlElem.SetAttribute("adendtag", a2b(this.adEndTag.Text));
                xmlElem.SetAttribute("pageReg", a2b(this.txtPagePatt.Text));
                xmlDoc.DocumentElement.AppendChild(xmlElem);
                xmlDoc.Save(Application.StartupPath + "/URL.xml");
                MessageBox.Show("操作成功!");
                //refresh list
                this.BindLink();
                this.comboBoxListURL.SelectedIndex = this.comboBoxListURL.Items.Count - 1;
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.ToString());
            }

        }


        /// <summary>
        /// 新建网站
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void menuItem2_Click(object sender, System.EventArgs e)
        {
            this.name.Text = "";
            this.url.Text = "";
            this.reg.Text = "";
            this.startTag.Text = "";
            this.endTag.Text = "";
            this.adStartTag.Text = "";
            this.adEndTag.Text = "";
            this.txtPagePatt.Text = "";
            this.txtReplace.Text = "";
        }

        /// <summary>
        /// 退出程序
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void menuItem3_Click(object sender, System.EventArgs e)
        {
            Application.Exit();
        }

        /// <summary>
        /// 是否多页抓取
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void chkBoxIsMutiPage_CheckedChanged(object sender, System.EventArgs e)
        {
            if (this.txtUrl.Enabled)
            {
                this.txtUrl.Enabled = false;
            }
            else
            {
                this.txtUrl.Enabled = true;
            }
            if (this.txtstartpage.Enabled)
            {
                this.txtstartpage.Enabled = false;
            }
            else
            {
                this.txtstartpage.Enabled = true;
            }
            if (this.txtendpage.Enabled)
            {
                this.txtendpage.Enabled = false;
            }
            else
            {
                this.txtendpage.Enabled = true;
            }
            if (this.txtleijia.Enabled)
            {
                this.txtleijia.Enabled = false;
            }
            else
            {
                this.txtleijia.Enabled = true;
            }
            if (this.textBoxMaxThread.Enabled)
            {
                this.textBoxMaxThread.Enabled = false;
            }
            else
            {
                this.textBoxMaxThread.Enabled = true;
            }
            //分析URL
            string strUrl = this.url.Text.Trim();
            try
            {
                if (strUrl.ToLower().IndexOf("page=") > -1)
                {
                    strUrl = strUrl.Substring(0, strUrl.IndexOf("page=") + 5) + "@pageid" + strUrl.Substring(strUrl.IndexOf("&", strUrl.IndexOf("page=") + 1));
                }
                if (strUrl.ToLower().IndexOf("pageid=") > -1)
                {
                    strUrl = strUrl.Substring(0, strUrl.IndexOf("pageid=") + 7) + "@pageid" + strUrl.Substring(strUrl.IndexOf("&", strUrl.IndexOf("pageid=") + 1));
                }
                if (strUrl.ToLower() == this.url.Text.Trim().ToLower())
                {
                    //表示没有处理
                    if (strUrl.IndexOf("_") > -1)
                    {
                        strUrl = strUrl.Substring(0, strUrl.LastIndexOf("_") + 1) + "@pageid" + strUrl.Substring(strUrl.LastIndexOf("."));
                    }
                }
                if (strUrl.ToLower() == this.url.Text.Trim().ToLower())
                {
                    //表示没有处理
                    if (strUrl.ToLower().IndexOf("index") > -1)
                    {
                        strUrl = strUrl.Substring(0, strUrl.LastIndexOf("index") + 5) + "@pageid" + strUrl.Substring(strUrl.LastIndexOf("."));
                    }
                }
            }
            catch
            {
                this.txtUrl.Text = this.url.Text;
            }
            this.txtUrl.Text = strUrl;
        }



        /// <summary>
        /// 选择网站
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void comboBoxListURL_SelectedIndexChanged(object sender, System.EventArgs e)
        {
            try
            {
                MyItem myitem = (MyItem) comboBoxListURL.SelectedItem;
                this.url.Text = myitem.url;
                this.name.Text = myitem.name;
                this.reg.Text = myitem.regex;
                this.startTag.Text = myitem.starttag;
                this.endTag.Text = myitem.endtag;
                this.adStartTag.Text = myitem.adstarttag;
                this.adEndTag.Text = myitem.adendtag;
                this.txtPagePatt.Text = myitem.pageReg;
            }
            catch { }
        }


        /// <summary>
        /// 保存
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btnDel_Click(object sender, System.EventArgs e)
        {
            try
            {
                MyItem myitem = (MyItem) comboBoxListURL.SelectedItem;

                if (myitem != null)
                {
                    XmlDataDocument xmlDoc = new XmlDataDocument();
                    if (File.Exists(Application.StartupPath + "/URL.xml"))
                    {
                        xmlDoc.Load(Application.StartupPath + "/URL.xml");
                    }
                    else
                    {
                        xmlDoc.LoadXml("<root />");
                    }

                    XmlNode node = xmlDoc.SelectSingleNode("/root/url[@id=" + myitem.id + "]");
                    xmlDoc.SelectSingleNode("/root").RemoveChild(node);
                    xmlDoc.Save(Application.StartupPath + "/URL.xml");
                    MessageBox.Show("操作成功!");
                }

                BindLink();

            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
        }

    }


    /// <summary>
    /// 抓取文章类
    /// </summary>
    public class GetArticle
    {
        #region "属 性"

        public Form1 parentForm = null;

        private string _url = "";
        public string url
        {
            get { return _url; }
            set { this._url = value; }
        }
        private string _reg = "";
        public string reg
        {
            get { return _reg; }
            set { this._reg = value; }
        }
        private string _reg1 = "";
        public string reg1
        {
            get { return _reg1; }
            set { this._reg1 = value; }
        }

        private string _pageReg = "";
        public string pageReg
        {
            get { return _pageReg; }
            set { this._pageReg = value; }
        }

        private string _folderpath = "";
        public string folderpath
        {
            get { return _folderpath; }
            set { this._folderpath = value; }
        }
        private string _startTag = "";
        public string startTag
        {
            get { return _startTag; }
            set { this._startTag = value; }
        }
        private string _endTag = "";
        public string endTag
        {
            get { return _endTag; }
            set { this._endTag = value; }
        }

        private string _adStartTag = "";
        public string adStartTag
        {
            get { return _adStartTag; }
            set { this._adStartTag = value; }
        }
        private string _adEndTag = "";
        public string adEndTag
        {
            get { return _adEndTag; }
            set { this._adEndTag = value; }
        }

        #endregion


        /// <summary>
        /// 开始抓取数据
        /// </summary>
        /// <param name="url">网址</param>
        /// <param name="reg">正则表达式</param>
        /// <returns></returns>
        public void strat()
        {
            if (this.folderpath == "")
            {
                folderpath = "C:\\Documents and Settings\\" + Environment.UserName + "\\桌面\\";
            }
            if (!Directory.Exists(folderpath))
            {
                Directory.CreateDirectory(folderpath);
            }
            if (url == "")
            {
                return;
            }
            if (reg == "")
            {
                reg = ".*";
            }
            string content = "";
            Regex regex;
            Match mc;
            content = getWebContent(url);
            content = content.Replace("\"", "'");

            //开始用正则表达式取出图片文件名
            regex = new Regex(reg, RegexOptions.Compiled | RegexOptions.IgnoreCase);

            XmlDataDocument xmlDoc = new XmlDataDocument();
            xmlDoc.LoadXml("<root />");
            XmlElement xmlElem = xmlDoc.CreateElement("ArticleList");



            int successCount = 1;
            int i = 1;
            for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
            {
                DateTime startTime = DateTime.Now;
                //if(i>10)break;
                //outHtml += "标题:" + mc.Groups["topic"].Value + " 链接:" + mc.Groups["url"].Value+"\n";
                //抓取出来了数据,然后根据地址把内容取出来
                string tempstr = "";
                string topic = "";
                string href = "";
                string vdatetime = "";
                string catalogname = "";
                try
                {

                    //tempstr = content.Replace("\"","'");
                    //处理tempstr,取其内容
                    topic = mc.Groups["topic"].Value.Trim();
                    href = mc.Groups["url"].Value;
                    vdatetime = mc.Groups["vdatetime"].Value;
                    catalogname = mc.Groups["catalogname"].Value;


                    string RealURL = "";
                    string url1 = this.url;

                    if (href.IndexOf("http") <= -1)
                    {
                        if (href.StartsWith("/"))
                        {
                            //url1 = url1.Substring(0, url1.IndexOf('/', 0, 2));
                            Regex r = new Regex(@"^http://(?<d>[^/]+)/", RegexOptions.Compiled);
                            RealURL = "http://" + r.Match(url).Result("${d}") + href;
                        }
                        else
                        {
                            url1 = url1.Substring(0, url1.LastIndexOf('/'));
                            RealURL = url1 + "/" + href;
                        }
                    }
                    else
                    {
                        RealURL = href;
                    }

                    //this.parentForm.statusBar1.ForeColor = Color.Black;
                    //this.parentForm.statusBar1.Text = "正在下载"+RealURL;
                    tempstr = getWebContent(RealURL);
                    //备用

                    //过滤内容页
                    string contentString = tempstr;
                    tempstr = FilterContent(tempstr);

                    //是否内容页有分页
                    if (this.pageReg != "")
                    {
                        try
                        {
                            Match mcContent;
                            Regex regexContent = new Regex(this.pageReg);
                            int intPage = 0;
                            for (mcContent = regexContent.Match(contentString), intPage = 1; mcContent.Success; mcContent = mcContent.NextMatch(), intPage++)
                            {
                                string pageUrl = mcContent.Groups["url"].Value;
                                int page = 1;
                                if (mcContent.Groups["page"].Value != "")
                                {
                                    try
                                    {
                                        page = Convert.ToInt32(mcContent.Groups["page"].Value);
                                    }
                                    catch { }
                                }

                                string pageRealURL = "";
                                string pageUrl1 = RealURL;

                                if (pageUrl.IndexOf("http") <= -1)
                                {
                                    if (pageUrl.StartsWith("/"))
                                    {
                                        Regex r = new Regex(@"^http://(?<d>[^/]+)/", RegexOptions.Compiled);
                                        pageRealURL = "http://" + r.Match(pageUrl1).Result("${d}") + pageUrl;
                                    }
                                    else
                                    {
                                        pageUrl1 = pageUrl1.Substring(0, pageUrl1.LastIndexOf('/'));
                                        pageRealURL = pageUrl1 + "/" + pageUrl;
                                    }
                                }
                                else
                                {
                                    pageRealURL = pageUrl;
                                }
                                if (page > 1)
                                {
                                    //从第二页开始
                                    //开始抓去内容分页的下一页内容
                                    string nextPageContent = getWebContent(pageRealURL);
                                    //开始过滤
                                    nextPageContent = FilterContent(nextPageContent);
                                    tempstr += "<br/>" + nextPageContent;
                                    tempstr = Regex.Replace(tempstr, this.pageReg, "");
                                }
                            }
                        }
                        catch { }
                    }


                    if (tempstr != "" && topic != "")
                    {
                        //开始加入xml
                        XmlElement xmlElemArt = xmlDoc.CreateElement("Article");
                        xmlElemArt.SetAttribute("topic", topic);
                        xmlElemArt.SetAttribute("href", href);
                        xmlElemArt.SetAttribute("comefrom", this.parentForm.name.Text);
                        xmlElemArt.SetAttribute("vdatetime", vdatetime);
                        xmlElemArt.SetAttribute("catalogname", catalogname);
                        xmlElemArt.InnerText = tempstr;
                        xmlElem.AppendChild(xmlElemArt);

                        this.parentForm.j++;

                        //Add to listView
                        this.parentForm.statusBar1.ForeColor = Color.Black;
                        this.parentForm.statusBar1.Text = "成功抓取" + this.parentForm.j + "篇";

                        ListViewItem item1 = new ListViewItem(Thread.CurrentThread.Name + "->" + i.ToString(), 0);
                        item1.SubItems.Add(topic);
                        item1.SubItems.Add("True");
                        item1.SubItems.Add(tempstr.Length + "byte");
                        DateTime endTime = DateTime.Now;
                        TimeSpan ts = endTime - startTime;
                        item1.SubItems.Add(ts.TotalSeconds + "秒");


                        this.parentForm.lvResult.Items.AddRange(new ListViewItem[] { item1 });
                        this.parentForm.lvResult.Items[this.parentForm.lvResult.Items.Count - 1].EnsureVisible();
                        this.parentForm.lvResult.TopItem.Selected = true;



                        //更改ListView中的名字,
                        for (int j = 0; j < this.parentForm.listView1.Items.Count; j++)
                        {
                            if ((int) this.parentForm.listView1.Items[j].Tag == Thread.CurrentThread.GetHashCode())
                            {
                                this.parentForm.listView1.Items[j].Text = Thread.CurrentThread.Name + "-" + successCount + "";
                            }
                        }
                        successCount++;


                    }
                    else
                    {
                        throw new Exception("Error");
                    }
                }
                catch
                {
                    //MessageBox.Show(ex.ToString());
                    this.parentForm.statusBar1.ForeColor = Color.Red;
                    this.parentForm.statusBar1.Text = "抓取失败  ====> " + topic;
                    ListViewItem item1 = new ListViewItem(Thread.CurrentThread.Name + "->" + i.ToString(), 0);
                    item1.SubItems.Add(topic);
                    item1.SubItems.Add("False");
                    item1.SubItems.Add("");
                    item1.SubItems.Add("");
                    this.parentForm.lvResult.Items.AddRange(new ListViewItem[] { item1 });
                    this.parentForm.lvResult.Items[this.parentForm.lvResult.Items.Count - 1].EnsureVisible();
                    this.parentForm.statusBar1.ForeColor = Color.Red;
                }

            }
            xmlDoc.DocumentElement.AppendChild(xmlElem);
            string filename = "";

            filename = this.parentForm.name.Text + "_" + DateTime.Now.ToShortDateString();
            int filenameNum = 1;
            while (File.Exists(folderpath + "\\" + filename + "_" + filenameNum + ".xml"))
            {
                filenameNum++;
            }
            filename = filename + "_" + filenameNum + ".xml";

            try
            {
                xmlDoc.Save(folderpath + "\\" + filename);
            }
            catch
            {
                this.parentForm.statusBar1.Text = "保存Xml失败 ==> " + folderpath + "\\" + filename;
            }

            this.parentForm.statusBar1.ForeColor = Color.Black;

            //减小当前线程数
            //this.parentForm.currentThreadCount--;
            Form1.currentThreadCount--;
            this.parentForm.statusBar1.Text = Thread.CurrentThread.Name + " 执行完毕!";
            //更改ListView中的名字,
            for (int j = 0; j < this.parentForm.listView1.Items.Count; j++)
            {
                if ((int) this.parentForm.listView1.Items[j].Tag == Thread.CurrentThread.GetHashCode())
                {
                    this.parentForm.listView1.Items[j].ForeColor = Color.Black;
                }
            }
            this.parentForm.statusBar1.ForeColor = Color.Green;
            this.parentForm.statusBar1.Text = Thread.CurrentThread.Name + " 抓取完毕,共抓取" + this.parentForm.j + "篇,数据已保存在" + folderpath + "\\" + filename;
            this.parentForm.btnStrat.Enabled = true;
            this.parentForm.comboBoxListURL.Enabled = true;

        }


        /// <summary>
        /// 过滤内容
        /// </summary>
        /// <param name="tempstr"></param>
        /// <returns></returns>
        public string FilterContent(string tempstr)
        {
            string url1 = this.url;
            try
            {
                //开始对内容页分析

                tempstr = tempstr.Substring(tempstr.IndexOf(this.startTag) + this.startTag.Length);
                tempstr = tempstr.Substring(0, tempstr.IndexOf(this.endTag));

                if (this.adStartTag != "" && this.adEndTag != "")
                {
                    //clear ad
                    try
                    {
                        tempstr = tempstr.Substring(0, tempstr.IndexOf(this.adStartTag)) + tempstr.Substring(tempstr.IndexOf(this.adEndTag, tempstr.IndexOf(this.adStartTag)) + this.adEndTag.Length);
                        tempstr = tempstr.Substring(0, tempstr.IndexOf(this.adStartTag)) + tempstr.Substring(tempstr.IndexOf(this.adEndTag, tempstr.IndexOf(this.adStartTag)) + this.adEndTag.Length);
                        tempstr = tempstr.Substring(0, tempstr.IndexOf(this.adStartTag)) + tempstr.Substring(tempstr.IndexOf(this.adEndTag, tempstr.IndexOf(this.adStartTag)) + this.adEndTag.Length);
                    }
                    catch { }
                }
                //开始转换tempstr中的一些相对路径的图片 ,带“

                string imagename = System.Text.RegularExpressions.Regex.Match(tempstr, ".*src=\"(?<image>[^\\s]+)\".*").Groups["image"].Value;


                if (imagename.IndexOf("http") <= -1)
                {
                    if (imagename.StartsWith("/"))
                    {
                        Regex r1 = new Regex(@"^http://(?<d>[^/]+)/", RegexOptions.Compiled | RegexOptions.IgnoreCase);
                        tempstr = System.Text.RegularExpressions.Regex.Replace(tempstr, "src=\"(?<image>[^\\s]+)\"", "src=\"http://" + r1.Match(this.url).Result("${d}") + "${image}\"", RegexOptions.IgnoreCase);
                    }
                    else
                    {
                        tempstr = System.Text.RegularExpressions.Regex.Replace(tempstr, "src=\"(?<image>[^\\s]+)\"", "src=\"" + url1.Substring(0, url1.LastIndexOf('/')) + "/${image}\"", RegexOptions.IgnoreCase);
                    }
                }


                //开始转换tempstr中的一些相对路径的图片 ,不带“
                imagename = System.Text.RegularExpressions.Regex.Match(tempstr, ".*src=(?<image>[^\\s]+).*").Groups["image"].Value;


                if (imagename != "")
                {
                    if (imagename.IndexOf("http") <= -1)
                    {
                        if (imagename.StartsWith("/"))
                        {
                            Regex r1 = new Regex(@"^http://(?<d>[^/]+)/", RegexOptions.Compiled | RegexOptions.IgnoreCase);
                            tempstr = System.Text.RegularExpressions.Regex.Replace(tempstr, "src=(?<image>[^\\s]+)", "src=\"http://" + r1.Match(this.url).Result("${d}") + "${image}\"", RegexOptions.IgnoreCase);
                        }
                        else
                        {
                            tempstr = System.Text.RegularExpressions.Regex.Replace(tempstr, "src=(?<image>[^\\s]+)", "src=\"" + url1.Substring(0, url1.LastIndexOf('/')) + "/${image}\"", RegexOptions.IgnoreCase);
                        }
                    }
                }

                //替换掉垃圾内容,简单替换,暂时不支持正则替换
                if (this.parentForm.txtReplace.Text != "")
                {
                    tempstr = tempstr.Replace(this.parentForm.txtReplace.Text, "");
                }


                //如果需要下载图片,则下载图片到本地,并且替换内容中图片路径
                if (this.parentForm.IsDownloadImage.Checked)
                {

                    Match imgMc;
                    //找出所有的图片或者文件
                    Regex imgReg = new Regex(".*src=\"(?<FileName>[^\\s^>]+)\".*");
                    for (imgMc = imgReg.Match(tempstr); imgMc.Success; imgMc = imgMc.NextMatch())
                    {
                        //开始下载文件 ContentFileName
                        DownloadFile(imgMc.Groups["FileName"].Value);
                        //开始修改文件中的名字
                        //有时间再写
                    }


                    //找出所有的图片或者文件
                    imgReg = new Regex(".*src=(?<FileName>[^\\s^>]+).*");
                    for (imgMc = imgReg.Match(tempstr); imgMc.Success; imgMc = imgMc.NextMatch())
                    {
                        //开始下载文件 ContentFileName
                        DownloadFile(imgMc.Groups["FileName"].Value);
                        //开始修改文件中的名字
                        //有时间再写
                    }

                }

                return tempstr;

            }
            catch
            {
                return "";
            }
        }


        /// <summary>
        /// 下载文件
        /// </summary>
        /// <param name="filename"></param>
        private void DownloadFile(string filename)
        {
            if (filename == "")
                return;
            string path = Application.StartupPath + "\\ArticleContentImageFile\\";
            if (!Directory.Exists(path))
            {
                Directory.CreateDirectory(path);
            }
            try
            {

                HttpWebRequest oRequest = (HttpWebRequest) WebRequest.Create(filename);

                HttpWebResponse oResponse = (HttpWebResponse) oRequest.GetResponse();
                StreamReader sr = new StreamReader(oResponse.GetResponseStream(), System.Text.Encoding.GetEncoding("utf-8"));

                string sResultContents = sr.ReadToEnd();
                oResponse.Close();
                byte[] bytes = System.Text.Encoding.GetEncoding("utf-8").GetBytes(sResultContents);
                FileStream fs = new FileStream(path + filename.Substring(filename.LastIndexOf("/")), FileMode.OpenOrCreate, FileAccess.Write);
                fs.Write(bytes, 0, bytes.Length);
                fs.Flush();
                fs.Close();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }

        }


        /// <summary>
        /// 抓取页面接口 - WebClient
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        private string getWebContent(string contenturl)
        {
            string str = "";
            contenturl = contenturl.Replace("&amp;", "&");
            WebClient client = new WebClient();
            client.Headers.Add("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*");
            client.Headers.Add("Accept-Language", "zh-cn");
            client.Headers.Add("UA-CPU", "x86");
            client.Headers.Add("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)");
            try
            {
                byte[] buffer = client.DownloadData(contenturl);
                if (Form1.encoding == "utf-8")
                {
                    str = System.Text.Encoding.GetEncoding("utf-8").GetString(buffer, 0, buffer.Length);
                }
                else
                {
                    str = System.Text.Encoding.GetEncoding("gb2312").GetString(buffer, 0, buffer.Length);
                }
            }
            catch (Exception ex)
            {
                //MessageBox.Show(ex.Message);
            }

            return str;
        }
    }


    /// <summary>
    /// MyItem Object
    /// </summary>
    public class MyItem : object
    {
        public int id;
        public string name;
        public string regex;
        public string url;
        public string starttag;
        public string endtag;
        public string adstarttag;
        public string adendtag;
        public string pageReg;
        public override string ToString()
        {
            // TODO:  添加 MyItem.ToString 实现
            return name;
        }
    }
}

原文地址:http://www.cnblogs.com/yesun/archive/2006/06/26/431304.html
posted @ 2007-02-05 16:56  海浪~~  阅读(1097)  评论(3)    收藏  举报