re-demo

 1 #!/usr/bin/env python
 2 # -*- coding: utf-8 -*-
 3 
 4 import re
 5 import urllib
 6 
 7 def getHtml(url):
 8     page=urllib.urlopen(url)
 9     html=page.read()
10     return html
11 
12 def getImg(html):
13     reg=r'src="(.*\.jpg)"'
14     imgre=re.compile(reg)
15     imgList=imgre.findall(html)
16     print "imgList size: " ,len(imgList)
17     # for i in range(len(imgList)):
18     #     urllib.urlretrieve(imgList[i],str(i)+'.jpg')
19 
20     for imgurl in imgList:
21         # shortname=imgurl.split('/')[::-1][0]
22         tempArr=imgurl.split('/')
23         shortname=tempArr[len(tempArr)-1]
24         print shortname
25         urllib.urlretrieve(imgurl,shortname)
26         
27 
28     return imgList
29 
30 val=getHtml("http://www.ipc.me/shoulu/pic-list-12-1.html")
31 
32 print val
33 print '--------------------------------\n'
34 
35 
36 imgs=getImg(val)
37 print imgs
38 #['http://ipc.chotee.com/uploads/ishoulu/thumb/2013/05/4d149dbf6499bac7e8efc599a9e323cf_230.jpg', 'http://ipc.chotee.com/uploads/ishoulu/thumb/2013/05/a524fdd315b3ee2aee095f0985c852a0_230.jpg', 'http://ipc.chotee.com/uploads/ishoulu/thumb/2012/08/f6413ef4370b70eb355a6a794a1f8f75_230.jpg']

 

posted @ 2014-04-26 10:57  zhangxiaodel  阅读(301)  评论(0编辑  收藏  举报