使用 requests_html 批量下载图片

# 自学, 不足之处还请大佬不吝指导,在此谢过.

from requests_html import HTMLSession
from lxml import etree
import re
import  urllib3
urllib3.disable_warnings()

url = "https://www.q.com/feature/travel/2527.html"
header = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36 Edg/90.0.818.56',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6'
}

session = HTMLSession()
session.verify = False

r = session.get(url, headers = header)
# render()等价于浏览器打开
r.html.render() # 使用render()渲染 首次使用, 自动下载chromium

# $('.img')
images = r.html.find('.img')

for i in images:

    # 链接地址
    links = i.xpath('.//a')[0].attrs.get('href')   # /imgbuy/105-0128.html
    buy_links = 'https://www.q.com' + links   # https://www.q.com/imgbuy/105-0128.html
    htmllink = re.findall("/imgbuy/(.+?)$",links)[0]   # 105-0128.html
    # print(links, buy_links,htmllink)

    # 标题
    title = i.xpath('.//a/img')[0].attrs.get('alt',"未获取到标题")
    if len(title) < 1:
        title ="未取到标题"+ htmllink   # 解决文件保存的文件名同名问题,否则保存图片时文件重复无法保存(覆盖).
    else:
        title = title +htmllink

    # 图片地址
    src = str(i.xpath('.//a/img')[0].attrs.get('lowsrc'))  # 取出的值为list,需使用stc()转换为字符串
    print(title, src)


    try:
        #保存图片
        r_save_pic = session.get(src, headers = header)
        # r.content
        with open("D:/Pictures/OP/"+"%s.jpg"%str(title),"wb") as fp:
            fp.write(r_save_pic.content)
  except Exception as msg:
      print("下载中出现异常:%s"%str(msg))
r.close()

  

posted @ 2021-05-14 15:48  Aliwall  阅读(347)  评论(0编辑  收藏  举报