01 彼岸壁纸下载单页
案例一:彼岸图网4K壁纸
import requests
from selenium.webdriver.chrome.service import Service
from selenium import webdriver
from time import sleep
from lxml import etree
import os
# 判断文件夹是否存在,若不存在则创建
filename = '彼岸 单页测试动漫壁纸'
if not os.path.exists(filename):
os.mkdir(filename)
# 伪装头部信息headers
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'
}
# 初识url
url = 'https://pic.netbian.com/4kdongman/'
# 浏览器驱动路径
s = Service(r'E:\Luffycity\爬虫案例实战操作\chromedriver.exe')
# 建立浏览器对象
bro = webdriver.Chrome(service=s)
# 对url发起请求
bro.get(url=url)
sleep(1)
# 获取页面源码数据(page_source)
page_text_one = bro.page_source
# #定位标签
#
# bro.find_element(By.XPATH,'')
# 数据解析单页
tree = etree.HTML(page_text_one)
# 定位图片li标签
for i in range(1, 5):
# //*[@id="main"]/div[3]/ul/li[1]
li_list = tree.xpath('//*[@id="main"]/div[3]/ul/li[%d]' % i)
for li in li_list:
# 获取图片src
img_src = 'https://pic.netbian.com' + li.xpath('./a/img/@src')[0]
# 获取图片标题
img_title = li.xpath('./a/b/text()')[0] + '.png'
# 解析每一张图片的数据(二进制数据,所以是content)
img_data = requests.get(url=img_src, headers=headers).content
# 创建文件夹路径
file_path = filename + '/' + img_title
# 打开文件
with open(file_path, 'wb') as f:
# 写入图片数据(下载图片)
f.write(img_data)
print('----当前是第%d个---' % i, img_title, '下载完成')
f.close()
本文来自博客园,作者:Chimengmeng,转载请注明原文链接:https://www.cnblogs.com/dream-ze/p/17177104.html