"""
模拟浏览器登录-处理cookie
防盗链处理-抓取梨视频
代理-防止被封IP
"""
# http://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"4306063500"}
#
# http://dushu.baidu.com/api/pc/getChapterContent?data={"book_id":"4306063500","cid":"4306063500|1569782244","need_bookinfo":1}
# Host: dushu.baidu.com
# User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0
import requests
import asyncio
import aiohttp
import aiofiles
import json
# 定义一个下载小说内容的方法
async def aiodownload(cid, b_id, title):
# 要将json变成字符串,要用json包
data ={
"book_id": b_id,
"cid": f"{b_id}|{cid}",
"need_bookinfo": 1
}
# 需要把data变成json字符串
data = json.dumps(data)
# 下载每一个小说内容的链接,参数化data
url = f"http://dushu.baidu.com/api/pc/getChapterContent?data={data}"
# 准备session
async with aiohttp.ClientSession as session:
# session发送请求
async with session.get(url) as resp:
# 获取页面源代码,定义为json格式,从发送请求里面,得到要的json
dic = await resp.json()
# 从而知道,文章内容在这个位置dic['data']['novel']['content'],open方法是属于aiofiles的,也是异步的,以title命名文件
async with aiofiles.open('.//novel//' + title, mode="w", encoding="utf-8")as f:
# 将内容写进文档,因此写入文档也需要异步
await f.write(dic['data']['novel']['content'])
# 定义一个获取目录方法
async def get_catalog(main_links):
# 获取页面代码
resp = requests.get(main_links)
# 打印页面代码
# print(resp.text)
# 获取cid,将页面内容转换为json,从json里面,json赋值给dic,成为一个字典
dic = resp.json()
# 创建一个空列表
tasks = []
# for循环,从字典里面查找
for item in dic['data']['novel']['items']: # item就是对应每一个章节的内容和cid
# title = item['title'] title就是item里面的title
title = item['title']
cid = item['cid']
# 准备异步任务,全部整合到task列表中
tasks.append(aiodownload(cid, b_id, title))
# print(cid, title)
await asyncio.wait(tasks)
if __name__ == '__main__':
b_id = '4306063500'
main_links = 'http://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"' + b_id + '"}'
# 调用获取目录方法,不能简单调用
# get_catalog(main_links)
asyncio.run(get_catalog(main_links))