第一只小啊小爬虫(纪念下)

import urllib.request

import re

def getHtml(url):
page = urllib.request.urlopen(url)
html = page.read()
html = html.decode('GBK')
return html

def getImg(html):
reg = r'src="(.*?\.jpg)"'
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
i = 0
for imgurl in imglist:
urllib.request.urlretrieve(imgurl, '%s.jpg' % x)
i+=1


html = getHtml("http://tupian.zol.com.cn/")
getImg(html)

posted @ 2014-12-04 22:04  静坐观雨  阅读(113)  评论(0编辑  收藏  举报