抓取网上有用信息
一直都想写个抓取网上信息的程序,但一直没动手,刚把手头的事情做完、突然想到这个就顺手写了个,可以用但是没经过太多的思考,不足肯定有,欢迎拍砖!下面直接贴代码
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Xml;
namespace WindowsFormsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
textBox1.Text = CJ("http://www.xcjob.cn/Talents.aspx?key=&type1=&type2=%E4%B8%8D%E9%99%90&type4=&type3=&area=&xl="); ;
MessageBox.Show("恭喜你采集完成!");
}
//采集数据
private string CJ(string Url)
{
//获得页面源文件(Html)
string strWebContent = YM(Url);
string statt = "<table class=\"GridView\"";//截取有用范围
int iBodyStart = strWebContent.IndexOf(statt);
string html = strWebContent.Substring(iBodyStart);
int iBodyStart1 = strWebContent.IndexOf("<!-- AspNetPager");//截取有用范围 先了解网站结构
html = strWebContent.Substring(iBodyStart, iBodyStart1 - iBodyStart);//在
string content= html.Trim().Replace("\r", "").Replace("\n", "").Replace ("</div>","");//替换字符
content = content.Replace(" ", " ");
string sb = "<?xml version=\"1.0\"?><a>";//转换成xml读取
sb = sb + content + "</a>";
XmlDocument doc = new XmlDocument();
doc.LoadXml(sb);
System.Text.StringBuilder sb11 = new StringBuilder();
XmlNodeList xm = doc.SelectNodes("/a/table/tr/td[1]");
foreach (XmlNode i in xm)
{
sb11.Append(i.InnerText + "\r\n");
}
return sb11.ToString();
}
private string YM(string Url) //主要方法
{
string strResult = "";
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
request.Method = "GET";
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream streamReceive = response.GetResponseStream();
Encoding encoding = Encoding.GetEncoding("utf-8");
StreamReader streamReader = new StreamReader(streamReceive, encoding);
strResult = streamReader.ReadToEnd();
}
catch { }
return strResult;
}
}
}

浙公网安备 33010602011771号