[Python]计算豆瓣电影TOP250的平均得分

用python写的爬虫练习,感觉比golang要好写一点。

 1 import re
 2 import urllib
 3 
 4 origin_url = 'https://movie.douban.com/top250?start=00&filter='
 5 urls = []
 6 scores = []
 7 
 8 
 9 def get_url():
10     step = 0
11     while step <= 250:
12         tmp = origin_url[:38]
13         tmp += str(step)
14         tmp += origin_url[40:]
15         urls.append(tmp)
16         step += 25
17 
18 def get_html(url):
19     page = urllib.urlopen(url)
20     html = page.read()
21     return html
22 
23 
24 def get_score(html):
25     score = []
26     reg = r'property="v:average">([0-9].[0-9])</span>'
27     score = re.findall(re.compile(reg), html)
28     return score
29 
30 
31 def solve():
32     get_url()
33     for each in urls:
34         print each
35         scores.append(get_score(get_html(each)))
36     sum = 0
37     cnt = 0
38     for each in scores:
39         if cnt == 250: break
40         for i in range(0, len(each)):
41             if cnt == 250: break
42             cnt += 1
43             sum += float(each[i])
44     return sum / 250
45 
46 print solve()

 

posted @ 2016-08-10 15:15  Kirai  阅读(369)  评论(0编辑  收藏  举报