textBox1.Text = "";
listBox1.Items.Clear();
DataSet ds = new DataSet();
DLFcWebService.Service s = new DlfcSpider.DLFcWebService.Service();
#region 取列表
try
{
s.Bot_List(ref ds, chk);

//MessageBox.Show(ds.Tables[0].Rows[3][2].ToString());

for (int i = 75; i > 0; i--)
{


string Url = ds.Tables[0].Rows[4][4].ToString() + i.ToString();


System.Net.WebClient j = new System.Net.WebClient();
System.IO.Stream str = j.OpenRead(Url);
System.IO.StreamReader sr = new System.IO.StreamReader(str, System.Text.Encoding.Default);
string content = sr.ReadToEnd();
//textBox1.Text = content;

//System.Text.RegularExpressions.Regex r = new System.Text.RegularExpressions.Regex(@"showout.jsp?id=(^\d+$)", System.Text.RegularExpressions.RegexOptions.Multiline);

//System.Text.RegularExpressions.Match m = r.Match(Text);

System.Text.RegularExpressions.MatchCollection Matches = System.Text.RegularExpressions.Regex.Matches(content, @"showrent.js([\w- ./?%&=]*)?", System.Text.RegularExpressions.RegexOptions.Multiline);
listBox2.Items.Clear();
foreach (System.Text.RegularExpressions.Match mmm in Matches)
{

listBox2.Items.Add(mmm.ToString());

}
for (int p = listBox2.Items.Count - 1; p >= 0; p--)
{
if (Int32.Parse(listBox2.Items[p].ToString().Replace("showrent.jsp?id=", "")) > (Int32.Parse(ds.Tables[0].Rows[4][3].ToString())))
listBox1.Items.Add(listBox2.Items[p].ToString());

}
}
}

#endregion
#region 抓网页
if (listBox1.Items.Count > 0)
{

for (int p = 0; p < listBox1.Items.Count; p++)
{
try
{
string Url = http://xxx.com/bbb.asp?id= + listBox1.Items[p].ToString();
System.Net.WebClient j = new System.Net.WebClient();
System.IO.Stream str = j.OpenRead(Url);
System.IO.StreamReader sr = new System.IO.StreamReader(str, System.Text.Encoding.Default);
string content = sr.ReadToEnd();
if (content.IndexOf(" ", 0) > 0)
{
string[] b = System.Text.RegularExpressions.Regex.Split(content, "
");
string[] c = System.Text.RegularExpressions.Regex.Split(b[1].ToString(), "
");

string f = c[0].ToString().ToLower()
//textBox1.Text = f;
.Replace("bgcolor=\"ffffff\">", "DlFc_DlFc")//原来用整个td来区分后来发现前面的宽度是不一样的,但后面的bgcolor=\"ffffff\">是一样的,简化一些,少了N次replace
.Replace(" ", "DlFc_DlFc").Replace("\n", "").Replace("\r", "").Replace(" ", "").Replace(" ", " ");
string[] d = System.Text.RegularExpressions.Regex.Split(f, "DlFc_DlFc");
//for (int i = 0; i < d.Length; i++)
//{
// textBox1.Text += i.ToString() + "" + d[i].ToString() + "\r\n";
//}



}

textBox1.Text += "共抓取" + listBox1.Items.Count.ToString() + "条";
MessageBox.Show("合租" + listBox1.Items[listBox1.Items.Count - 1].ToString() + "共抓取" + listBox1.Items.Count.ToString() + "条");

}
else
MessageBox.Show("合租没有新记录");
#endregion
}
http://workgroup.cn/CS/blogs/aspnet/archive/2006/06/02/_9362D6530854DF79E14F6F60E34E0178_.aspx
posted on 2007-03-08 00:09  mbskys  阅读(171)  评论(0)    收藏  举报