小爬虫 - shileiadmin

公告

import urllib
import re

def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html

def getImage(html):
reg = r'src="(.*?\.jpg)"'
recmp = re.compile(reg)
imageList = re.findall(recmp,html)
x = 0
for imgurl in imageList:
urllib.urlretrieve(imgurl,"%s.jpg" % x)
x += 1
def test():
filename=open('D:\\python\\ch1\\test.txt','w')
print >> filename,"test write file"
filename.close()

def main():
html = getHtml("http://tieba.baidu.com/p/3338408181?pn=2&statsInfo=frs_pager")
getImage(html)
os.system("pause")

if __name__ == "__main__":
main()

posted on 2014-10-09 23:47 shileiadmin 阅读(210) 评论(0) 收藏举报

刷新页面返回顶部