C#获取博客园的标题
http://www.jb51.net/tools/zhengze.html
Regex reg = new Regex(@"<li><a href=""(?<url>[^""]*)"" target=""_blank"">(?<title>[^<]*)</a></li>"); string html = "<div>sadfasdfasd</div> <div class=\"video_1_left\"> <UL> <li><a href=\"/news/12718.html\" target=\"_blank\">标题sadfasdfasdfasdf</a></li> <li><a href=\"/news/12710.html\" target=\"_blank\">标题asdfasdfasdf</a></li> <li><a href=\"/news/12729.html\" target=\"_blank\">v2sdfasdf</a></li> <li><a href=\"/news/12728.html\" target=\"_blank\">标题sdfsadf</a></li> </UL> </div> <div class=\"video_1_right\"> <UL> <li><a href=\"/news/12705.html\" target=\"_blank\">标题xxxfasdfasdfx</a></li> <li><a href=\"/news/12737.html\" target=\"_blank\">标题xxxdfasdfasax</a></li> </UL> </div> <div>sadfasdfasd</div> "; foreach (Match m in reg.Matches(html)) { Console.WriteLine(m.Groups["url"].Value + "\t" + m.Groups["title"].Value); }
指定表达式的组名:(?<Word>\w+),这样就把\w+的组名指定为Word了
匹配exp,并捕获文本到名称为name的组里,(?<name>exp)
设置webapi,部署在IIS上
public HttpResponseMessage GetGrabNews() { NewsManage news = new NewsManage(); return toJson(news.GrabNews("http://www.cnblogs.com/")); }
public HttpResponseMessage toJson(Object obj) { string str = ""; if (obj is String || obj is Char) { str = obj.ToString(); } else { str = JsonConvert.SerializeObject(obj); } HttpResponseMessage result = new HttpResponseMessage { Content = new StringContent(str, Encoding.GetEncoding("UTF-8"), "application/json") }; result.Content.Headers.ContentType.MediaType = "text/plain"; result.Content.Headers.Add("Access-Control-Allow-Origin", "*");//解决跨域问题 return result; }
public List<T_News> GrabNews(string url) {//后台逻辑(1) List<T_News> newsList = new List<T_News>(); HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);//获取请求连接 req.Method = "get"; req.ContentType = "text/html;chartset=utf-8"; StringBuilder sb = new StringBuilder(); using (HttpWebResponse wr = req.GetResponse() as HttpWebResponse) { Stream respStream = wr.GetResponseStream(); StreamReader reader = new StreamReader(respStream, Encoding.GetEncoding("UTF-8")); do { sb.Append(reader.ReadLine()); } while (!reader.EndOfStream); newsList=AnalysisHtml(sb.ToString()); } return newsList; }
public List<T_News> AnalysisHtml(string htmlContent) { //string strPattern = "<div\\s*class=\"post_item\">\\s*.*\\s*.*\\s*.*\\s*.*\\s*.*\\s*.*\\s*.*\\s*<div\\s*class=\"post_item_body\">\\s*<h><a\\s*class=\"titlelnk\"\\s*href=\"(?<href>.*)\"\\s*target=\"_blank\">(?<title>.*)</a>.*\\s*<p\\s*class=\"post_item_summary\">\\s*(?<content>.*)\\s*</p>"; string strPattern = "<a\\s*class=\"titlelnk\"\\s*href=\"http://www.cnblogs.com/(?<href>[^\"\"]*)\"\\s*target=\"_blank\">(?<title>[^<]*)</a>";
//正则表达式,(1)href,title是组名称,(?<href>[^""]*),表示除了""外所有字符串,(?<title>[^<]*)获取所有不是<开头的字符串;(2)\s*表示匹配任意的空白符,*表示多个 //Regex regex = new Regex(strPattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant); Regex regex = new Regex(strPattern, RegexOptions.IgnoreCase); List<T_News> newsList = new List<T_News>(); if (regex.IsMatch(htmlContent)) { MatchCollection matchCollection = regex.Matches(htmlContent); foreach (Match match in matchCollection) { //string title = match.Groups[].Value;//获取到的是列表数据的标题 //string content = match.Groups[].Value;//获取到的是内容 //string linkurl=match.Groups[].Value;//获取到的是链接到的地址 newsList.Add(new T_News { Title=match.Groups["title"].Value, Content=match.Groups["href"].Value});//根据组名获取title,href值 } } //自定义,实例 Regex reg = new Regex(@"<li><a href=""(?<url>[^""]*)"" target=""_blank"">(?<title>[^<]*)</a></li>");//url,title表示2个组名,(?<url>[^""]*)匹配不是""开头的字符串""开头说明字符串已经结束,(?<title>[^<]*)表示不是<开头的字符串,如果<开头说明a标签已经结束; string html = "<div>sadfasdfasd</div> <div class=\"video_1_left\"> <UL> <li><a href=\"/news/12718.html\" target=\"_blank\">标题sadfasdfasdfasdf</a></li> <li><a href=\"/news/12710.html\" target=\"_blank\">标题asdfasdfasdf</a></li> <li><a href=\"/news/12729.html\" target=\"_blank\">v2sdfasdf</a></li> <li><a href=\"/news/12728.html\" target=\"_blank\">标题sdfsadf</a></li> </UL> </div> <div class=\"video_1_right\"> <UL> <li><a href=\"/news/12705.html\" target=\"_blank\">标题xxxfasdfasdfx</a></li> <li><a href=\"/news/12737.html\" target=\"_blank\">标题xxxdfasdfasax</a></li> </UL> </div> <div>sadfasdfasd</div> "; foreach (Match m in reg.Matches(html)) { // Console.WriteLine(m.Groups["url"].Value + "\t" + m.Groups["title"].Value); } return newsList; }
客户端调用webapi接口
$.get("http://10.100.22.54:8095/api/NewsManager/GetGrabNews", function (data) { var $ul = $(".list-ul"); $ul.empty(); $.each(data, function (key, value) { var title = value.Title; //if (title.length > 23) { title = title.substring(0, 20) + "...";} $ul.append('<li class="list-li"><div class="con" data-href=' + value.Content + '>' + (title.gblen() > 43 ? title.gbsub(40) : title) + '<div class="btn" data-id='+value.ID+'>删除</div>' +'</li></div>'); }) },"json")

浙公网安备 33010602011771号