1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import re
5 import urllib
6
7 def getHtml(url):
8 page=urllib.urlopen(url)
9 html=page.read()
10 return html
11
12 def getImg(html):
13 reg=r'src="(.*\.jpg)"'
14 imgre=re.compile(reg)
15 imgList=imgre.findall(html)
16 print "imgList size: " ,len(imgList)
17 # for i in range(len(imgList)):
18 # urllib.urlretrieve(imgList[i],str(i)+'.jpg')
19
20 for imgurl in imgList:
21 # shortname=imgurl.split('/')[::-1][0]
22 tempArr=imgurl.split('/')
23 shortname=tempArr[len(tempArr)-1]
24 print shortname
25 urllib.urlretrieve(imgurl,shortname)
26
27
28 return imgList
29
30 val=getHtml("http://www.ipc.me/shoulu/pic-list-12-1.html")
31
32 print val
33 print '--------------------------------\n'
34
35
36 imgs=getImg(val)
37 print imgs
38 #['http://ipc.chotee.com/uploads/ishoulu/thumb/2013/05/4d149dbf6499bac7e8efc599a9e323cf_230.jpg', 'http://ipc.chotee.com/uploads/ishoulu/thumb/2013/05/a524fdd315b3ee2aee095f0985c852a0_230.jpg', 'http://ipc.chotee.com/uploads/ishoulu/thumb/2012/08/f6413ef4370b70eb355a6a794a1f8f75_230.jpg']