下载百度贴吧图片

学习urllib及re模块:

import urllib
import re

pic_re = re.compile(r'http:\/\/imgsrc\.baidu\.com.*?\.jpg')

def html(url):
    urlfile = urllib.urlopen(url)
    htmlfile =  urlfile.read()
    urlfile.close()
    return htmlfile

def download(htmllist):
    
    piclist = pic_re.findall(htmllist)
    i = 1
    for pic in piclist:
        try:
            urllib.urlretrieve(pic,"D:\\pictures\\%s.jpg"%i)
            print "Download %s.jpg successfully." % i
        except urllib.ContentTooShortError:
            try:
                urllib.urlretrieve(pic,"D:\\pictures\\%s.jpg"%i)
                print "Download %s.jpg successfully." % i
            except urllib.ContentTooShortError,e:
                print e
                continue
        i += 1
    urllib.urlcleanup()
    

if __name__ == "__main__":
    url = raw_input("Please input the URL:")
    htmllist = html(url)
    download(htmllist)


 

posted @ 2014-10-20 22:37  迷途小毛驴  阅读(135)  评论(0)    收藏  举报