爬虫

获得榜单的前166部电影的评分总和(http://movie.douban.com/top250)

using System;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;

namespace ConsoleApplication1
{
    class Program
    {
        public static string GetUrlRequerstInfo(string url)
        {
            string strBuff = "";
            Uri httpURL = new Uri(url);
            HttpWebRequest httpReq = (HttpWebRequest)WebRequest.Create(httpURL);
            HttpWebResponse httpResp = (HttpWebResponse)httpReq.GetResponse();
            Stream respStream = httpResp.GetResponseStream();
            StreamReader respStreamReader = new StreamReader(respStream, Encoding.UTF8);
            strBuff = respStreamReader.ReadToEnd();
            return strBuff;
        }  

        static void Main(string[] args)
        {
            int len = 250 / 25;
            string result = "";
            int start = 0;
            string regex2 = "<span class=\"rating_num\" property=\"v:average\">.+</span>";

            Regex re = new Regex(regex2);

            int num = 0;//总分
            bool isEnd = false;

            decimal total = 0;
            for (int i = 0; i < len; i++)
            {
                if (!isEnd)
                {
                    start = i * 25;
                    result = GetUrlRequerstInfo("http://movie.douban.com/top250?start=" + start + "&filter=");

                    MatchCollection matches = re.Matches(result);
                    System.Collections.IEnumerator enu = matches.GetEnumerator();
                    while (enu.MoveNext() && enu.Current != null)
                    {
                        Match match = (Match)(enu.Current);
                        string s = match.Value;
                        try
                        {
                            total += Convert.ToDecimal(s.Replace("<span class=\"rating_num\" property=\"v:average\">", "").Replace("</span>", ""));
                            num++;
                            if (num == 166)
                            {
                                isEnd = true;
                                break;
                            }
                        }
                        catch (Exception)
                        {

                        }
                    }
                }
            }
            Console.WriteLine(total);
            Console.Read();
        }
    }
}

 

posted @ 2016-02-27 11:33  云天  阅读(225)  评论(0编辑  收藏  举报