抓取url中图片并保存到本地demo

 

import requests
from lxml import etree
from furl import furl

url = 'https://dsd.com'
html = requests.get(url).text

#re.findall('"objURL":"(.*?)",',html, re.S)
element = etree.HTML(html)
#//div/img/@src
#li[contains(@title, '省')]
#[@href and @lmv='电视剧']
#[@href|@lmv]
#item[@公司名称='" + strArray[0] + "' and @是否发过='0']
#xpath('//div[contains(@class,"a") and contains(@class,"b")]')
#//div[contains(concat(' ', @class, ' '), 'demo')]
imgs = [img.xpath('./text()')
        for img in element.xpath('//div[@class="reader-container"]/div//img')]


html = '''<div class="mod flow-ppt-mod">
<div class="page-1 ppt-page-item  batch-50-1" id="pageNo-1">
<div class="ppt-image-wrap ppt-16-9">
<img src="https://sdsd.com?pn=1" alt="">
</div>
</div>
<div class="page-2 ppt-page-item  batch-50-1" id="pageNo-2">
<div class="ppt-image-wrap ppt-16-9">
<img data-src="https://sdsd.com?pn=2">
</div>
</div>'''


element = etree.HTML(html)
#//div/img/@src
#li[contains(@title, '省')]
#[@href and @lmv='电视剧']
#[@href|@lmv]
#item[@公司名称='" + strArray[0] + "' and @是否发过='0']
#xpath('//div[contains(@class,"a") and contains(@class,"b")]')
#//div[contains(concat(' ', @class, ' '), 'demo')]

urls = [url
        for img in element.xpath('//div//img') 
        for url in img.xpath('./@src') + img.xpath('./@data-src')]


def download(url):
    try:
        pic = requests.get(url, timeout=5)
    except requests.exceptions.ConnectionError:
        print('图片无法下载')
    #保存图片路径
    #kv = dict([s.split('=') for s in urls[0].split('?')[1].split('&')])
    f = furl(url)
    path = r'C:\Users\Semi-Luy\Desktop\ppt' + '\\' + f.args['pn'] + '.jpg'
    fp = open(path, 'wb')
    fp.write(pic.content)
    fp.close()

print("开始下载图片:\r\n")
for url in urls:
    print(url)
    download(url)

  

posted on 2021-11-30 14:46  iUpoint  阅读(106)  评论(0编辑  收藏  举报

导航