re-demo
1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 4 import re 5 import urllib 6 7 def getHtml(url): 8 page=urllib.urlopen(url) 9 html=page.read() 10 return html 11 12 def getImg(html): 13 reg=r'src="(.*\.jpg)"' 14 imgre=re.compile(reg) 15 imgList=imgre.findall(html) 16 print "imgList size: " ,len(imgList) 17 # for i in range(len(imgList)): 18 # urllib.urlretrieve(imgList[i],str(i)+'.jpg') 19 20 for imgurl in imgList: 21 # shortname=imgurl.split('/')[::-1][0] 22 tempArr=imgurl.split('/') 23 shortname=tempArr[len(tempArr)-1] 24 print shortname 25 urllib.urlretrieve(imgurl,shortname) 26 27 28 return imgList 29 30 val=getHtml("http://www.ipc.me/shoulu/pic-list-12-1.html") 31 32 print val 33 print '--------------------------------\n' 34 35 36 imgs=getImg(val) 37 print imgs 38 #['http://ipc.chotee.com/uploads/ishoulu/thumb/2013/05/4d149dbf6499bac7e8efc599a9e323cf_230.jpg', 'http://ipc.chotee.com/uploads/ishoulu/thumb/2013/05/a524fdd315b3ee2aee095f0985c852a0_230.jpg', 'http://ipc.chotee.com/uploads/ishoulu/thumb/2012/08/f6413ef4370b70eb355a6a794a1f8f75_230.jpg']