01 彼岸壁纸下载单页

案例一:彼岸图网4K壁纸

import requests
from selenium.webdriver.chrome.service import Service
from selenium import webdriver
from time import sleep
from lxml import etree
import os

# 判断文件夹是否存在,若不存在则创建
filename = '彼岸 单页测试动漫壁纸'
if not os.path.exists(filename):
    os.mkdir(filename)
# 伪装头部信息headers
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'
}
# 初识url
url = 'https://pic.netbian.com/4kdongman/'
# 浏览器驱动路径
s = Service(r'E:\Luffycity\爬虫案例实战操作\chromedriver.exe')
# 建立浏览器对象
bro = webdriver.Chrome(service=s)
# 对url发起请求
bro.get(url=url)

sleep(1)
# 获取页面源码数据(page_source)
page_text_one = bro.page_source

# #定位标签
#
# bro.find_element(By.XPATH,'')

# 数据解析单页
tree = etree.HTML(page_text_one)
# 定位图片li标签
for i in range(1, 5):
    # //*[@id="main"]/div[3]/ul/li[1]
    li_list = tree.xpath('//*[@id="main"]/div[3]/ul/li[%d]' % i)
    for li in li_list:
        # 获取图片src
        img_src = 'https://pic.netbian.com' + li.xpath('./a/img/@src')[0]
        # 获取图片标题
        img_title = li.xpath('./a/b/text()')[0] + '.png'
        # 解析每一张图片的数据(二进制数据,所以是content)
        img_data = requests.get(url=img_src, headers=headers).content
        # 创建文件夹路径
        file_path = filename + '/' + img_title
        # 打开文件
        with open(file_path, 'wb') as f:
            # 写入图片数据(下载图片)
            f.write(img_data)
            print('----当前是第%d个---' % i, img_title, '下载完成')
        f.close()
posted @ 2023-03-03 21:46  Chimengmeng  阅读(59)  评论(0)    收藏  举报