抓取网上有用信息

     一直都想写个抓取网上信息的程序,但一直没动手,刚把手头的事情做完、突然想到这个就顺手写了个,可以用但是没经过太多的思考,不足肯定有,欢迎拍砖!下面直接贴代码

   

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Xml;
namespace WindowsFormsApplication1
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }
        private void button1_Click(object sender, EventArgs e)
        {
            textBox1.Text = CJ("http://www.xcjob.cn/Talents.aspx?key=&type1=&type2=%E4%B8%8D%E9%99%90&type4=&type3=&area=&xl="); ;
            MessageBox.Show("恭喜你采集完成!");
        }
        //采集数据
        private  string  CJ(string Url)
        {
            //获得页面源文件(Html)
            string strWebContent = YM(Url);
            string statt = "<table class=\"GridView\"";//截取有用范围
            int iBodyStart = strWebContent.IndexOf(statt);
            string html = strWebContent.Substring(iBodyStart);
            int iBodyStart1 = strWebContent.IndexOf("<!-- AspNetPager");//截取有用范围  先了解网站结构
            html = strWebContent.Substring(iBodyStart, iBodyStart1 - iBodyStart);//在
            string content= html.Trim().Replace("\r", "").Replace("\n", "").Replace ("</div>","");//替换字符
            content = content.Replace("&nbsp;", " ");
            string sb = "<?xml version=\"1.0\"?><a>";//转换成xml读取
            sb = sb + content + "</a>";

            XmlDocument doc = new XmlDocument();
            doc.LoadXml(sb);
            System.Text.StringBuilder sb11 = new StringBuilder();
            XmlNodeList xm = doc.SelectNodes("/a/table/tr/td[1]");
            foreach (XmlNode i in xm)
            {
                sb11.Append(i.InnerText + "\r\n");
            }
            return sb11.ToString();
        }

       private string YM(string Url)   //主要方法
        {
            string strResult = "";

            try
            {
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                request.Method = "GET";
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream streamReceive = response.GetResponseStream();
                Encoding encoding = Encoding.GetEncoding("utf-8");
                StreamReader streamReader = new StreamReader(streamReceive, encoding);
                strResult = streamReader.ReadToEnd();
            }
            catch { }

            return strResult;
        }

    }
}

 

 

posted @ 2013-10-11 11:24  刘观应  阅读(163)  评论(0)    收藏  举报