python之爬贴吧图片

#coding=utf-8
import re
import urllib.request

def getHtml(url):
    page = urllib.request.urlopen(url)
    html = page.read()
    #print(type(html))
    html = html.decode('UTF-8')
    #print(html)
    return html

def getImg(html):
    reg = r'img class="BDE_Image" src="(.+?\.jpg)"'
    imgre = re.compile(reg)
    #print(type(imgre))
    #print(imgre)
    imglist = re.findall(imgre,html)
    #print(type(imglist))
    #print(imglist)
    num = 0
    for imgurl in imglist:
        urllib.request.urlretrieve(imgurl,'D:\img\hardaway%s.jpg' %num)
        num+=1

html = getHtml("http://www.27270.com/tag/1130.html")
print(getImg(html))

 

posted on 2018-03-02 00:14  杨学友  阅读(145)  评论(0)    收藏  举报

导航