增补博客第十二篇 python大作业小说阅读器（1）爬取

python 大作业 小说阅读器 这个代码是用来爬取下载小说的
# 导入所需的库
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from lxml import html
import time  # 导入 time 模块

# 创建一个 Edge 的 WebDriver 实例
driver = webdriver.Edge()

# 打开网页
driver.get("https://www.bqg88.cc/finish/")

roll = 500
while True:
    h_before = driver.execute_script('return document.documentElement.scrollTop')
    time.sleep(1)
    driver.execute_script(f'window.scrollTo(0,{roll})')
    time.sleep(1)
    h_after = driver.execute_script('return document.documentElement.scrollTop')
    roll += 500
    print(h_after, h_before)
    if h_before == h_after:
        break

# 等待页面内容加载完成
wait = WebDriverWait(driver, 2)
# 更改等待条件为判断页面是否加载完成
wait.until(EC.presence_of_element_located((By.XPATH, '//div[@class="item"]')))

# 获取页面的 HTML 内容
page_source = driver.page_source

# 使用 lxml 解析 HTML
tree = html.fromstring(page_source)

# 使用 XPath 提取作者、标题和链接
authors = tree.xpath('//div[@class="item"]/dl/dt/span/text()')
book_titles = tree.xpath('//div[@class="item"]/dl/dt/a/text()')
links = tree.xpath('//div[@class="item"]/dl/dt/a/@href')

# 将爬取的数据存入文件
file_path = "..//分类小说链接记录//完本.txt"
with open(file_path, "w", encoding="utf-8") as f:
    for author, title, link in zip(authors, book_titles, links):
        f.write(f"作者: {author.strip()}\n")
        f.write(f"书名: {title.strip()}\n")
        f.write(f"链接: {link}\n\n")

# 关闭浏览器
driver.quit()

posted @ 2024-06-14 12:19 财神给你送元宝阅读(33) 评论(0) 收藏举报

刷新页面返回顶部

youxiandechilun

增补博客第十二篇 python大作业小说阅读器（1）爬取

公告

youxiandechilun

增补博客 第十二篇 python大作业小说阅读器（1）爬取

公告

增补博客第十二篇 python大作业小说阅读器（1）爬取