1 import requests 2 import urllib.parse 3 import urllib.request 4 import re 5 6 7 def getHtml(url): 8 values = {'name' : 'WHY', 9 'location' : 'SDU', 10 'language' : 'Python', 11 'ie' : 'utf-8', 12 'wd' : 'python' } 13 data = urllib.parse.urlencode(values) 14 req = urllib.request.Request(url+'?'+data) 15 response = urllib.request.urlopen(req) 16 the_page = response.read() 17 return the_page.decode('UTF8') 18 19 def getImg(html,x): 20 reg = r'src="(.+?\.jpg)" pic_ext' 21 imgre = re.compile(reg) 22 imglist = re.findall(imgre, html) 23 for imgurl in imglist: 24 urllib.request.urlretrieve(imgurl,'%s.jpg' % x) 25 x+=1 26 return x 27 28 29 y=0 30 for i in range(20,40): 31 url = "http://tieba.baidu.com/p/2460150866?pn="+str(i+1) 32 html = getHtml(url) 33 y=getImg(html,y) 34 print(url) 35 36