第五关网页图片的爬取及本地保存

点击查看代码
import requests
from lxml import etree
import os

if not os.path.exists('./美图'):
    os.makedirs('./美图')

url = 'https://www.spiderbuf.cn/playground/s05'
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
}
res = requests.get(url=url, headers=headers)
# print(res.text)
root = etree.HTML(res.text)
divs = root.xpath('//div[@class="col-sm-4"]')
for div in divs:
    src = f'https://www.spiderbuf.cn' + ''.join(div.xpath('./img/@src'))
    title = src.split('/')[-1]
    print(src, title)
    meitu_res = requests.get(src, headers=headers)
    with open(f'./美图/{title}', 'wb') as f:
        f.write(meitu_res.content)
print('下载完毕')


posted @ 2024-10-08 09:35  神仙不在  阅读(20)  评论(0)    收藏  举报