1 # -*- coding:utf-8 -*-
2 import urllib2
3 import re
4 for page in range(1,5):
5 url = 'http://www.qiushibaike.com/hot/page/' + str(page)
6 user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
7 headers = { 'User-Agent' : user_agent }
8 try:
9 request = urllib2.Request(url,headers = headers)
10 response = urllib2.urlopen(request)
11 content = response.read().decode('utf-8')
12 pattern = re.compile('<span>(.*?)</span>',re.S)
13 items = re.findall(pattern,content)
14 for item in items:
15 print item
16 except urllib2.URLError, e:
17 if hasattr(e,"code"):
18 print e.code
19 if hasattr(e,"reason"):
20 print e.reason