第22章 Day26-Day27 DrissionPage之实战项目
案例一
页面:
https://fuwu.nhsa.gov.cn/nationalHallSt/#/search/disease-diagnosis?code=90000&flag=false&gbFlag=true
逆向接口:
https://fuwu.nhsa.gov.cn/ebus/fuwu/api/nthl/api/CommQuery/queryRtalPhacBInfo
案例二
页面:
https://app.diandian.com/
逆向接口:
https://api.diandian.com/pc/app/v1/user/favorite/app
案例三
页面:
https://www.cnhnb.com/
逆向接口:
https://appapi.cnhnb.com/recq/api/transform/supply/v501/index
点击查看小红书笔记.py代码
import time
from DrissionPage import ChromiumPage
from DataRecorder import Recorder
recorder = Recorder("data.xlsx")
recorder.set.show_msg(False)
def find_first_key_value(data, target_key):
# (1) 处理数据为字典的递归遍历
if isinstance(data, dict):
for key, val in data.items():
if key == target_key:
return val
# 递归遍历子元素
ret = find_first_key_value(val, target_key)
if ret is not None:
return ret
# (2) 处理数据为列表的递归遍历
if isinstance(data, list):
for item in data:
ret = find_first_key_value(item, target_key)
if ret is not None:
return ret
return None
def handler(page, keyword):
page.get(f"https://www.xiaohongshu.com/search_result?keyword={keyword}&source=unknown&type=51")
time.sleep(3)
s = set() # {}
for i in range(1, 51):
try:
cards = page.eles('@class=note-item')
# (1) 监听卡片详情接口
page.listen.start("web/v1/feed")
for card in cards:
# 去重卡片
index = card.attr("data-index")
if index in s:
continue
s.add(index)
# 遍历的每一个卡片
print(card)
# 点击卡片
card.ele('@tag()=img').click(by_js=True)
# (2) 等待卡片详情接口数据返回
res = page.listen.wait(count=1, timeout=1, fit_count=True)
# (3) 获取数据
data = res.response.body
print("data:::", data)
# 数据提取
nickname = find_first_key_value(data, "nickname")
title = find_first_key_value(data, "title")
desc = find_first_key_value(data, "desc")
comment_count = find_first_key_value(data, "comment_count")
liked_count = find_first_key_value(data, "liked_count")
# 基于recorder将采集数据写入excel
map = {
"博主昵称": nickname,
"标题": title,
"详情": desc,
"评论数": comment_count,
"点赞数": liked_count,
}
recorder.add_data(map)
recorder.record()
# 关闭卡片并等待
close_btn = page.ele('@class=close close-mask-dark')
close_btn.click()
time.sleep(3)
except Exception as e:
print("error:::", e)
finally:
# 滚动滚轮
page.scroll.up(100)
time.sleep(1)
page.scroll.to_bottom()
time.sleep(1)
def main():
with open("关键词.txt", mode="r", encoding="utf8") as f:
keyword_list = f.readlines()
# 创建浏览器驱动对象
page = ChromiumPage()
page.get("https://www.xiaohongshu.com/explore")
input("等待登录")
for keyword in keyword_list:
handler(page, keyword)
main()
点击查看JD.py代码
import json
import time
from DrissionPage import ChromiumPage
from DataRecorder import Recorder
recorder = Recorder("JD.xlsx")
recorder.set.show_msg(False)
def find_key_val(data, target_key, max_count=1):
results = []
# (1) json字符串反序列化
if isinstance(data, str):
try:
data = json.loads(data)
except json.JSONDecodeError:
return results
def _search(data):
# 最大数量限制
if len(results) == max_count:
return
# 处理数据为字典的递归遍历
if isinstance(data, dict):
for key, val in data.items():
if key == target_key:
results.append(val)
if len(results) == max_count:
return
# 递归遍历子元素
_search(val)
# (2) 处理数据为列表的递归遍历
if isinstance(data, list):
for item in data:
ret = _search(item)
if ret is not None:
return ret
return None
_search(data)
return results
def main():
page = ChromiumPage()
# 监听api
page.listen.start("client.action")
url = "https://item.jd.com/100006466663.html"
page.get(url)
if page.ele('@text()=全部评价'):
page.ele('@text()=全部评价').click(by_js=True)
while 1:
# 等待
res = page.listen.wait(1, 10, fit_count=True)
if not res:
break
# 获取数据
data = res.response.body
print("data:::", data)
commentInfoList = find_key_val(data, "commentInfo", 11)
for commentInfo in commentInfoList:
map = {
"用户名": commentInfo.get("userNickName"),
"评论时间": commentInfo.get("commentDate"),
"评论内容": commentInfo.get("commentData"),
"评分": commentInfo.get("commentScore"),
}
recorder.add_data(map)
recorder.record()
# 滚动滚轮并等待
page.ele('@class=_rateListContainer_1ygkr_45').scroll.to_bottom()
time.sleep(3) # 防止被封号
else:
print("没有等待到元素!")
main()
点击查看tools.py代码
import json
from typing import Any, List, Optional, Union
def find_key_values(
data: Union[dict, list, str],
target_key: str,
max_count: Optional[int] = None,
return_first: bool = False
) -> Union[List[Any], Any, None]:
"""
递归查找 JSON 数据中指定键的值,支持返回单个值或列表。
:param data: JSON 数据(dict/list/str)。
:param target_key: 要查找的键。
:param max_count: 最多返回的数量(None 表示全部)。
:param return_first: 是否只返回第一个值(优先级高于 max_count)。
:return: 如果 return_first=True 返回单个值或 None;否则返回列表。
"""
# 处理字符串输入(自动解析为 JSON)
if isinstance(data, str):
try:
data = json.loads(data)
except json.JSONDecodeError:
return None if return_first else []
results = []
def _search(current_data: Union[dict, list]) -> bool:
"""递归搜索,返回是否提前终止(用于优化性能)"""
if return_first and results:
return True # 找到第一个后提前终止
if isinstance(current_data, dict):
for key, value in current_data.items():
if key == target_key:
results.append(value)
if return_first or (max_count and len(results) >= max_count):
return True
if _search(value): # 递归搜索子值
return True
elif isinstance(current_data, list):
for item in current_data:
if _search(item): # 递归搜索列表项
return True
return False
_search(data)
if return_first:
return results[0] if results else None
return results
# 测试1
# data = {"a": 1,
# "b":{
# "a": 2}
# }
# result = find_key_values(data, "a", return_first=True)
# print(result) # 输出: 1
# 测试2
# data = {"a": 1, "b": {"a": 2}}
# result = find_key_values(data, "a", max_count=2)
# print(result) # 输出: [1, 2]
# 测试3
# data = {"a": 1, "b": {"a": 2, "c": {"a": 3}}}
# result = find_key_values(data, "a")
# print(result) # 输出: [1, 2, 3]
# 测试4
# json_str = '{"a": 1, "b": {"a": 2}}'
# result = find_key_values(json_str, "a", return_first=True)
# print(result) # 输出: 1

浙公网安备 33010602011771号