读取chinanews新闻列表及内容
string[] urilist ={ "http://www.chinanews.com/rss/scroll-news.xml%22,//热点
"http://www.chinanews.com/rss/finance.xml%22,//财经
"http://www.chinanews.com/rss/sports.xml%22,//体育
"http://www.chinanews.com/rss/ent.xml%22,//娱乐
"http://www.chinanews.com/rss/health.xml%22,//健康
"http://www.chinanews.com/rss/auto.xml%22,//汽车
"http://www.chinanews.com/rss/society.xml%22,//社会
};
"http://www.chinanews.com/rss/finance.xml%22,//财经
"http://www.chinanews.com/rss/sports.xml%22,//体育
"http://www.chinanews.com/rss/ent.xml%22,//娱乐
"http://www.chinanews.com/rss/health.xml%22,//健康
"http://www.chinanews.com/rss/auto.xml%22,//汽车
"http://www.chinanews.com/rss/society.xml%22,//社会
};
private void ReadNews(string uri, ushort type)
{
try
{
string xml = NetHelper.ReadHtml(uri, Encoding.GetEncoding("gb2312"));
XmlDocument doc = new XmlDocument();
doc.LoadXml(xml);
Dictionary<ushort, NewsEntity> newslist = new Dictionary<ushort, NewsEntity>();
XmlNodeList list = doc.SelectNodes("rss/channel/item");
for (int i = 0; i < list.Count; i++)
{
string title = System.Helpers.XmlHelper.GetChileNode(list[i], "title").InnerText.Replace("(图)", "").Replace("(组图)", "").Replace("(图)", "");
string link = System.Helpers.XmlHelper.GetChileNode(list[i], "link").InnerText;
string result = "";
int end = 0;
string html = NetHelper.ReadHtml(link, Encoding.Default);
int start = html.IndexOf("<div class=left_zw>");
if (start > 0)
end = html.IndexOf("<!--正文-->", start);
result = html.Substring(start, end - start);
int _end = 0;
int _start = result.IndexOf(@"<div id=""function_code_page"">");
if (_start > 0)
_end = result.IndexOf("</div>", _start);
string pageStr = result.Substring(_start, _end - _start);
result = result.Replace(pageStr, "");
result = Regex.Replace(result, "\r", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, "\n", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, "<.*?>", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, @"&(.{2,6});", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, "\r{2,}", "\r", RegexOptions.IgnoreCase);
result = Regex.Replace(result, "\t{2,}", "\t", RegexOptions.IgnoreCase);
result = Regex.Replace(result, @"\s{2,}", "", RegexOptions.IgnoreCase);
Console.WriteLine(result);
result = result.Trim('\r', '\n').TrimEnd();
if (!string.IsNullOrEmpty(title) && !string.IsNullOrEmpty(result))
{
{
try
{
string xml = NetHelper.ReadHtml(uri, Encoding.GetEncoding("gb2312"));
XmlDocument doc = new XmlDocument();
doc.LoadXml(xml);
Dictionary<ushort, NewsEntity> newslist = new Dictionary<ushort, NewsEntity>();
XmlNodeList list = doc.SelectNodes("rss/channel/item");
for (int i = 0; i < list.Count; i++)
{
string title = System.Helpers.XmlHelper.GetChileNode(list[i], "title").InnerText.Replace("(图)", "").Replace("(组图)", "").Replace("(图)", "");
string link = System.Helpers.XmlHelper.GetChileNode(list[i], "link").InnerText;
string result = "";
int end = 0;
string html = NetHelper.ReadHtml(link, Encoding.Default);
int start = html.IndexOf("<div class=left_zw>");
if (start > 0)
end = html.IndexOf("<!--正文-->", start);
result = html.Substring(start, end - start);
int _end = 0;
int _start = result.IndexOf(@"<div id=""function_code_page"">");
if (_start > 0)
_end = result.IndexOf("</div>", _start);
string pageStr = result.Substring(_start, _end - _start);
result = result.Replace(pageStr, "");
result = Regex.Replace(result, "\r", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, "\n", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, "<.*?>", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, @"&(.{2,6});", "", RegexOptions.IgnoreCase);
result = Regex.Replace(result, "\r{2,}", "\r", RegexOptions.IgnoreCase);
result = Regex.Replace(result, "\t{2,}", "\t", RegexOptions.IgnoreCase);
result = Regex.Replace(result, @"\s{2,}", "", RegexOptions.IgnoreCase);
Console.WriteLine(result);
result = result.Trim('\r', '\n').TrimEnd();
if (!string.IsNullOrEmpty(title) && !string.IsNullOrEmpty(result))
{
}
}
}
}
catch
{
catch
{
}
}
}