using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Xml;
namespace WebApplication19
{
public enum SearchRange
{
th=0,
td=1
}
public partial class WebForm1 : System.Web.UI.Page
{
public string MKT;
private string getHtml()
{
List<string> trList = new List<string>();
try
{
WebClient wc = new WebClient();
using (Stream stream = wc.OpenRead("http://srh.bankofchina.com/search/whpj/search.jsp?erectDate=2001-11-01¬hing=2016-11-04&pjname=1316&page=4"))
{
using (StreamReader sr = new StreamReader(stream, Encoding.UTF8))
{
string content = sr.ReadToEnd();
//提取div内容开始
string divPatern = @"(?<=<div (.*)?class=""BOC_main publish""[^>]*?>)([\s\S]*?)(?=</div>)";
MatchCollection divMatches = Regex.Matches(content, divPatern);
string divContent = string.Empty;
foreach (Match match in divMatches)
{
divContent = match.Groups[0].Value;
break;
}
//提取div内容结束
//提取表格内容开始
string tablePatern = @"(?<=<table (.*)?[^>]*?>)([\s\S]*?)(?=</table>)";
MatchCollection tableMatches = Regex.Matches(divContent, tablePatern);
string tableContent = string.Empty;
foreach (Match match in tableMatches)
{
tableContent = match.Groups[0].Value;
break;
}
//提取表格内容结束
//提取行开始
string trPatern = @"(?<=<tr(.*)?[^>]*?>)([\s\S]*?)(?=</tr>)";
MatchCollection trMatchCollection = Regex.Matches(tableContent, trPatern);
for (int j = 0; j < trMatchCollection.Count; j++)
{
Match match = trMatchCollection[j];
string tr = string.Empty;
tr = match.Groups[0].Value;
trList.Add(tr);
}
//提取行结束
}
//获取表头列元素,或者内容行的单元格元素 trlist[0]是表头 SearchR,ange告诉程序要查表头 还是 内容行
List<string> thList = GET_TH_OR_TD_LIST(SearchRange.th, trList[0]);
System.Collections.ArrayList tdsList = new System.Collections.ArrayList();
for (int i = 1; i < trList.Count; i++)
{
tdsList.Add(GET_TH_OR_TD_LIST(SearchRange.td, trList[i]));
}
}
}
catch (Exception ex)
{
}
return MKT;
}
private List<string> GET_TH_OR_TD_LIST(SearchRange range,string row)
{
string tmp = "";
tmp = range.ToString();
string tdPatern = $@"(?<=(<{tmp}[^>]*?>))(?<tdCell>[\s\S]*?)(?=</{tmp}>)";
MatchCollection CurrenttdMatchCollection = Regex.Matches(row, tdPatern);
string td = string.Empty;
List<string> tdlList = new List<string>();
List<string> contentList = new List<string>();
foreach (Match match in CurrenttdMatchCollection)
{
td = match.Groups["tdCell"].Value;
contentList.Add(td);
}
return contentList;
}
protected void Page_Load(object sender, EventArgs e)
{
getHtml();
}
}
}