C# 页面抓取类

抓取网站页面的内容,简单的类应用,代码如下:

 /// <summary>
        /// 获取页面内容
        /// </summary>
        /// <param name="url">Url链接</param>
        /// <returns></returns>
        public string WebHtmlCon(string url)
        {
            try
            {
                string htmlCon = "";
                WebRequest request = WebRequest.Create(url);
                WebResponse response = request.GetResponse();
                Stream stream = response.GetResponseStream();
                using (StreamReader sReader = new StreamReader(stream, Encoding.UTF8))
                {
                    htmlCon = sReader.ReadToEnd();
                }
                return htmlCon;
            }
            catch (Exception e)
            {
                return e.Message;
            } 
        }
        /// <summary>
        /// 获取页面内容
        /// </summary>
        /// <param name="url">Url链接</param>
        /// <returns></returns>
        public string WebClientHtmlCon(string url)
        {
            try
            {
                WebClient webclient = new WebClient();
                webclient.Encoding = Encoding.UTF8;
                string HtmlCon = webclient.DownloadString(url);
                return HtmlCon;
            }
            catch (Exception E)
            {
                return E.Message;
            }
        }

根据抓取的内容进行实际应用。

例如api的应用,获取页面内容的json数据,并进行分析获取自己想要的数据:

json数据分析代码(引用的.Net自带的类库应用):

         /// <summary>
        /// JSON数据解析 返回字典类 引用:System.Web.Extensions 类库
        /// </summary>
        /// <param name="jsonData">json数据</param>
        /// <returns></returns>
        private static Dictionary<string, object> JsonToDictionary(string jsonData)
        {
            JavaScriptSerializer jss = new JavaScriptSerializer();
            return jss.Deserialize<Dictionary<string, object>>(jsonData);
        }

快递查询API应用:

  /// <summary>
        /// 查询邮件的邮寄状况
        /// </summary>
        /// <param name="con">邮寄公司</param>
        /// <param name="number">邮寄号</param>
        /// <returns></returns>
        public static string SelectYJ(string con, string number)
        {
            string url = "http://www.kuaidi100.com/query?type=" + con + "&postid=" + number;//查询地址

            Dictionary<string, object> diclist = new Dictionary<string, object>();
            diclist = JsonToDictionary(WebHtmlCon(url));


            if (diclist["message"].ToString() == "ok")
            {
                string KuaiDi = "<table id=\"showtablecontext\" style=\"border-collapse: collapse; width:520px; border-spacing: 0; border:0;\">";
                KuaiDi += "<tbody>";
                KuaiDi += "<tr><th width='163' style=\"background: #64AADB; border: 1px solid #75C2EF; color: #FFFFFF; font-size: 14px; font-weight: bold; height: 28px; line-height: 28px; text-indent: 15px;\">时间</th><th width='354' style=\"background: #64AADB; border: 1px solid #75C2EF; color: #FFFFFF; font-size: 14px; font-weight: bold; height: 28px; line-height: 28px; text-indent: 15px;\">地点和跟踪进度</th></tr>";
                ArrayList list = (ArrayList)diclist["data"];

                foreach (Dictionary<string, object> item in list)
                {

                    KuaiDi += " <tr><td style=\"border: 1px solid #DDDDDD; font-size: 12px; line-height: 22px; padding: 3px 5px;\">" + item["time"].ToString() + "</td><td>" + item["context"].ToString() + "</td></tr>";

                }
                KuaiDi += "</tbody>";
                KuaiDi += "</table>";
                return KuaiDi;
            }
            else
            {
                return "<p style=\"line-height:28px;margin:0px;padding:0px;color:#F21818; font-size: 14px;\">快递公司网络异常,请稍后查询.</p>";
            }
        }

结果为:

 

posted @ 2015-04-13 16:00  王小贝  阅读(698)  评论(0编辑  收藏  举报