第22章 Day26-Day27 DrissionPage之实战项目

案例一

页面:

https://fuwu.nhsa.gov.cn/nationalHallSt/#/search/disease-diagnosis?code=90000&flag=false&gbFlag=true

逆向接口:

https://fuwu.nhsa.gov.cn/ebus/fuwu/api/nthl/api/CommQuery/queryRtalPhacBInfo

案例二

页面:

https://app.diandian.com/

逆向接口:

https://api.diandian.com/pc/app/v1/user/favorite/app

案例三

页面:

https://www.cnhnb.com/

逆向接口:

https://appapi.cnhnb.com/recq/api/transform/supply/v501/index
点击查看小红书笔记.py代码
import time

from DrissionPage import ChromiumPage
from DataRecorder import Recorder

recorder = Recorder("data.xlsx")
recorder.set.show_msg(False)


def find_first_key_value(data, target_key):
    # (1) 处理数据为字典的递归遍历
    if isinstance(data, dict):
        for key, val in data.items():
            if key == target_key:
                return val
            # 递归遍历子元素
            ret = find_first_key_value(val, target_key)

            if ret is not None:
                return ret

    # (2) 处理数据为列表的递归遍历
    if isinstance(data, list):
        for item in data:
            ret = find_first_key_value(item, target_key)
            if ret is not None:
                return ret

    return None


def handler(page, keyword):
    page.get(f"https://www.xiaohongshu.com/search_result?keyword={keyword}&source=unknown&type=51")
    time.sleep(3)
    s = set()  # {}
    for i in range(1, 51):
        try:
            cards = page.eles('@class=note-item')
            # (1) 监听卡片详情接口
            page.listen.start("web/v1/feed")
            for card in cards:
                # 去重卡片
                index = card.attr("data-index")
                if index in s:
                    continue
                s.add(index)

                # 遍历的每一个卡片
                print(card)
                # 点击卡片
                card.ele('@tag()=img').click(by_js=True)
                # (2) 等待卡片详情接口数据返回
                res = page.listen.wait(count=1, timeout=1, fit_count=True)
                # (3) 获取数据
                data = res.response.body
                print("data:::", data)

                #  数据提取
                nickname = find_first_key_value(data, "nickname")
                title = find_first_key_value(data, "title")
                desc = find_first_key_value(data, "desc")
                comment_count = find_first_key_value(data, "comment_count")
                liked_count = find_first_key_value(data, "liked_count")

                # 基于recorder将采集数据写入excel
                map = {
                    "博主昵称": nickname,
                    "标题": title,
                    "详情": desc,
                    "评论数": comment_count,
                    "点赞数": liked_count,
                }
                recorder.add_data(map)
                recorder.record()

                # 关闭卡片并等待
                close_btn = page.ele('@class=close close-mask-dark')
                close_btn.click()
                time.sleep(3)

        except Exception as e:
            print("error:::", e)

        finally:
            # 滚动滚轮
            page.scroll.up(100)
            time.sleep(1)
            page.scroll.to_bottom()
            time.sleep(1)


def main():
    with open("关键词.txt", mode="r", encoding="utf8") as f:
        keyword_list = f.readlines()

    # 创建浏览器驱动对象
    page = ChromiumPage()
    page.get("https://www.xiaohongshu.com/explore")

    input("等待登录")

    for keyword in keyword_list:
        handler(page, keyword)


main()

点击查看JD.py代码
import json
import time

from DrissionPage import ChromiumPage
from DataRecorder import Recorder

recorder = Recorder("JD.xlsx")
recorder.set.show_msg(False)


def find_key_val(data, target_key, max_count=1):
    results = []

    # (1) json字符串反序列化
    if isinstance(data, str):
        try:
            data = json.loads(data)
        except json.JSONDecodeError:
            return results

    def _search(data):
        # 最大数量限制
        if len(results) == max_count:
            return
        # 处理数据为字典的递归遍历
        if isinstance(data, dict):
            for key, val in data.items():
                if key == target_key:
                    results.append(val)
                    if len(results) == max_count:
                        return
                # 递归遍历子元素
                _search(val)

        # (2) 处理数据为列表的递归遍历
        if isinstance(data, list):
            for item in data:
                ret = _search(item)
                if ret is not None:
                    return ret

        return None

    _search(data)

    return results


def main():
    page = ChromiumPage()
    # 监听api
    page.listen.start("client.action")
    url = "https://item.jd.com/100006466663.html"
    page.get(url)

    if page.ele('@text()=全部评价'):
        page.ele('@text()=全部评价').click(by_js=True)

        while 1:
            # 等待
            res = page.listen.wait(1, 10, fit_count=True)
            if not res:
                break
            # 获取数据
            data = res.response.body
            print("data:::", data)
            commentInfoList = find_key_val(data, "commentInfo", 11)
            for commentInfo in commentInfoList:
                map = {
                    "用户名": commentInfo.get("userNickName"),
                    "评论时间": commentInfo.get("commentDate"),
                    "评论内容": commentInfo.get("commentData"),
                    "评分": commentInfo.get("commentScore"),
                }
                recorder.add_data(map)
                recorder.record()
            #  滚动滚轮并等待
            page.ele('@class=_rateListContainer_1ygkr_45').scroll.to_bottom()
            time.sleep(3)  # 防止被封号
    else:
        print("没有等待到元素!")


main()

点击查看tools.py代码
import json
from typing import Any, List, Optional, Union


def find_key_values(
        data: Union[dict, list, str],
        target_key: str,
        max_count: Optional[int] = None,
        return_first: bool = False
) -> Union[List[Any], Any, None]:
    """
    递归查找 JSON 数据中指定键的值,支持返回单个值或列表。

    :param data: JSON 数据(dict/list/str)。
    :param target_key: 要查找的键。
    :param max_count: 最多返回的数量(None 表示全部)。
    :param return_first: 是否只返回第一个值(优先级高于 max_count)。
    :return: 如果 return_first=True 返回单个值或 None;否则返回列表。
    """
    # 处理字符串输入(自动解析为 JSON)
    if isinstance(data, str):
        try:
            data = json.loads(data)
        except json.JSONDecodeError:
            return None if return_first else []

    results = []

    def _search(current_data: Union[dict, list]) -> bool:
        """递归搜索,返回是否提前终止(用于优化性能)"""
        if return_first and results:
            return True  # 找到第一个后提前终止

        if isinstance(current_data, dict):
            for key, value in current_data.items():
                if key == target_key:
                    results.append(value)
                    if return_first or (max_count and len(results) >= max_count):
                        return True
                if _search(value):  # 递归搜索子值
                    return True

        elif isinstance(current_data, list):
            for item in current_data:
                if _search(item):  # 递归搜索列表项
                    return True
        return False

    _search(data)

    if return_first:
        return results[0] if results else None
    return results


#  测试1
# data = {"a": 1,
#         "b":{
#             "a": 2}
#         }
# result = find_key_values(data, "a", return_first=True)
# print(result)  # 输出: 1
#  测试2
# data = {"a": 1, "b": {"a": 2}}
# result = find_key_values(data, "a", max_count=2)
# print(result)  # 输出: [1, 2]

#  测试3
# data = {"a": 1, "b": {"a": 2, "c": {"a": 3}}}
# result = find_key_values(data, "a")
# print(result)  # 输出: [1, 2, 3]
#  测试4
# json_str = '{"a": 1, "b": {"a": 2}}'
# result = find_key_values(json_str, "a", return_first=True)
# print(result)  # 输出: 1

posted @ 2025-09-25 16:59  凫弥  阅读(81)  评论(0)    收藏  举报