python3 爬虫 批量下载堆糖高清图片

#!/usr/bin/env Python3
# -*- encoding:utf-8 *-*
 
'''@author = 'Mickyqing'  '''
'''@time   = '2018年9月20日09:20:12' '''
 
from urllib import request, parse
import re
import os

def getJpg(data):
    jpglist = re.findall(r'<img(.*?)>', data)
    return jpglist

def getResponse(url):
    url_request = request.Request(url)    
    url_response = request.urlopen(url)
 
    return url_response

def downImg(start):
    global searchName
    global codeSearchName
    global n
    global picNum

    http_response = getResponse("https://www.duitang.com/search/?kw=" + codeSearchName + "&type=feed&include_fields=top_comments,is_root,source_link,item,buyable,root_id,status,like_count,like_id,sender,album,reply_count,favorite_blog_id&_=1537349450707&start=" + str(start)) #拿到http请求后的上下文对象(HTTPResponse object)
    jpg = re.findall(r'<img(.*?)>', http_response.read().decode('utf-8'))


    for x in jpg:
        path = re.findall(r'http.+?.jpeg', x)
        name = re.findall(r'alt="(.*?)"', x)
        imgid = re.findall(r'data-rootid="(.*?)"', x)

        if len(path) != 0 and len(name) != 0 and len(imgid) != 0:
            sub_http_response = getResponse('https://www.duitang.com/blog/?id=' + imgid[0])
            sub_path = re.findall(r'id="mbpho-img".+?src="(https.+?.jpeg)', sub_http_response.read().decode('utf-8'))

            if len(sub_path):
                print(sub_path[0])
                n = n + 1
                request.urlretrieve(sub_path[0], '.\\' + searchName + '\\' + name[0] + '_' + str(n) + '.jpg')

                if picNum <= n:
                    return False
    return True

def downPage(picNum):
    pageNum = int(picNum * 2 / 24)
    for i in range(pageNum):
        if not downImg(24 * i):
            break


n = 0
searchName = input('请输入搜索词:')
picNum = int(input('请输入想下载的图片个数:'))
codeSearchName = parse.quote(searchName)

dirs = os.path.dirname(os.path.realpath(__file__)) + '\\' + searchName

if not os.path.exists(dirs):
    os.mkdir(dirs);

downPage(picNum);

 

 

posted on 2018-09-20 11:39  菜心儿  阅读(334)  评论(0)    收藏  举报

导航