python 大作业 小说阅读器 这个代码是用来爬取下载小说的
# 导入所需的库
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from lxml import html
import time # 导入 time 模块
# 创建一个 Edge 的 WebDriver 实例
driver = webdriver.Edge()
# 打开网页
driver.get("https://www.bqg88.cc/finish/")
roll = 500
while True:
h_before = driver.execute_script('return document.documentElement.scrollTop')
time.sleep(1)
driver.execute_script(f'window.scrollTo(0,{roll})')
time.sleep(1)
h_after = driver.execute_script('return document.documentElement.scrollTop')
roll += 500
print(h_after, h_before)
if h_before == h_after:
break
# 等待页面内容加载完成
wait = WebDriverWait(driver, 2)
# 更改等待条件为判断页面是否加载完成
wait.until(EC.presence_of_element_located((By.XPATH, '//div[@class="item"]')))
# 获取页面的 HTML 内容
page_source = driver.page_source
# 使用 lxml 解析 HTML
tree = html.fromstring(page_source)
# 使用 XPath 提取作者、标题和链接
authors = tree.xpath('//div[@class="item"]/dl/dt/span/text()')
book_titles = tree.xpath('//div[@class="item"]/dl/dt/a/text()')
links = tree.xpath('//div[@class="item"]/dl/dt/a/@href')
# 将爬取的数据存入文件
file_path = "..//分类小说链接记录//完本.txt"
with open(file_path, "w", encoding="utf-8") as f:
for author, title, link in zip(authors, book_titles, links):
f.write(f"作者: {author.strip()}\n")
f.write(f"书名: {title.strip()}\n")
f.write(f"链接: {link}\n\n")
# 关闭浏览器
driver.quit()