使用Playwright识别腾讯云滑动验证码

最近在项目中遇到了腾讯云滑动验证码的问题，需要对其进行识别。在研究了一些代码库以及人工识别的过程中，最终使用了Playwright库来识别验证码。

什么是腾讯云滑动验证码？

在腾讯云的一些业务场景中，需要进行滑动验证码验证来防止爬虫等非法操作。这种滑动验证码会弹出一个交互式窗口，要求用户依据提示完成验证。

识别思路

我们知道，验证码识别的常见套路是将验证码图片进行处理，得到其中的数字或者字母信息。但是对于腾讯云这种带有交互性的滑动验证码，直接获取其图片内容难度太大。因此，我们需要考虑其他的识别方法。

腾讯云滑动验证码的交互式窗口中，会显示出一段移动距离，即“请控制拼图对齐缺口”的缺口距离。我们可以利用这个缺口距离信息，来完成验证码的识别。

接下来我们就来看看如何使用Playwright来进行腾讯云滑动验证码的识别。

`handle_captcha`

该函数是网络请求拦截函数，用于判断请求是否是验证码图片请求。如果是验证码图片请求，则将其下载到指定位置。

def handle_captcha(route: Route) -> None:
    response = route.fetch()
    if response.status == 200:
        buffer = response.body()
        # 下载指定规则url的验证码图片
        if "index=1" in response.url:
            is_reflashed_img = True
            with open(img, "wb") as f:
                f.write(buffer)
    route.continue_()

`dragbox_location`

该函数返回滑块的位置信息。在滑块识别成功后，用于确认该滑块的位置信息。

def dragbox_location():
    for i in range(5):
        dragbox_bounding = page.frame_locator("#tcaptcha_iframe").locator(
            "#tcaptcha_drag_thumb").bounding_box()
        if dragbox_bounding is not None and dragbox_bounding["x"] > 20:
            return dragbox_bounding
    return None

`drag_to_breach`

该函数用于模拟用户拖动滑块的过程，传入的参数是拖动的距离序列。

def drag_to_breach(move_distance):
    print('开始拖动滑块..')
    drag_box = dragbox_location()
    if drag_box is None:
        print('未获取到滑块位置,识别失败')
        return False
    page.mouse.move(drag_box["x"] + drag_box["width"] / 2,
                    drag_box["y"] + drag_box["height"] / 2)
    page.mouse.down()
    location_x = drag_box["x"]
    for i in move_distance:
        location_x += i
        page.mouse.move(location_x, drag_box["y"])
    page.mouse.up()
    if page.get_by_text("后重试") is not None or page.get_by_text("请控制拼图对齐缺口") is not None:
        print("识别成功")
        return True
    print('识别失败')
    return False

`calc_distance`

该函数用于计算滑块的缺口距离。

def calc_distance():
    for i in range(retryTimes):
        print(f"识别验证码距离中，当前等待轮数{i + 1}/{retryTimes}")
        try:
            res = qq_mark_pos(img)
            distance = res.x.values[0]
            if distance > 0:
                print(f"获取到缺口距离：{distance}")
                return distance
        except Exception as e:
            print(f"识别错误, 异常：{e}")

该函数使用OpenCV对验证码图片进行识别，获取滑块的缺口距离。如果识别失败，将会重新尝试。

识别流程

import io
import time
from playwright.sync_api import sync_playwright, Route
from CaptchaCv2 import (get_track_list, qq_mark_pos)

distance = 0
is_reflashed_img = False
img = "bg.png"
retryTimes = 10


def handle_captcha(route: Route) -> None:
    response = route.fetch()
    if response.status == 200:
        buffer = response.body()
        # 下载指定规则url的验证码图片
        if "index=1" in response.url:
            is_reflashed_img = True
            with open(img, "wb") as f:
                f.write(buffer)
    route.continue_()


def dragbox_location():
    for i in range(5):
        dragbox_bounding = page.frame_locator("#tcaptcha_iframe").locator(
            "#tcaptcha_drag_thumb").bounding_box()
        if dragbox_bounding is not None and dragbox_bounding["x"] > 20:
            return dragbox_bounding
    return None


def drag_to_breach(move_distance):
    print('开始拖动滑块..')
    drag_box = dragbox_location()
    if drag_box is None:
        print('未获取到滑块位置,识别失败')
        return False
    page.mouse.move(drag_box["x"] + drag_box["width"] / 2,
                    drag_box["y"] + drag_box["height"] / 2)
    page.mouse.down()
    location_x = drag_box["x"]
    for i in move_distance:
        location_x += i
        page.mouse.move(location_x, drag_box["y"])
    page.mouse.up()
    if page.get_by_text("后重试") is not None or page.get_by_text("请控制拼图对齐缺口") is not None:
        print("识别成功")
        return True
    print('识别失败')
    return False


def calc_distance():
    for i in range(retryTimes):
        print(f"识别验证码距离中，当前等待轮数{i + 1}/{retryTimes}")
        try:
            res = qq_mark_pos(img)
            distance = res.x.values[0]
            if distance > 0:
                print(f"获取到缺口距离：{distance}")
                return distance
        except Exception as e:
            print(f"识别错误, 异常：{e}")


with sync_playwright() as p:
    # browser = p.chromium.launch(channel="msedge",proxy={"server": "http://{}".format(proxy)})
    browser = p.chromium.launch(channel="msedge", headless=False)
    iphone_12 = p.devices["iPhone 12"]
    context = browser.new_context(
        record_video_dir="videos/",
        **iphone_12,
    )
    page = context.new_page()
    # 下载指定规则的验证码图片
    page.route("**/t.captcha.qq.com/hycdn**", handle_captcha)
    page.route("**/t.captcha.qq.com/cap_union_new_getcapbysig**", handle_captcha)
    page.goto(
        "https://wap.showstart.com/pages/passport/login/login?redirect=%252Fpages%252FmyHome%252FmyHome")

    page.get_by_role("spinbutton").fill("14445104596")
    page.get_by_text("获取验证码").click()

    frame = page.wait_for_selector("#tcaptcha_iframe")
    print(frame.bounding_box())
    move_distance = None
    for i in range(retryTimes):
        print(f"滑块拖动逻辑开始，当前尝试轮数{i + 1}/{retryTimes}")

        # 验证码刷新 重新计算距离
        if is_reflashed_img or move_distance is None:
            distance = calc_distance()
            page.wait_for_timeout(200)

            true_distance = distance * 353 / 680
            move_distance = get_track_list(true_distance)
            print(f"获取到相对滑动距离{true_distance}, 模拟拖动列表{move_distance}")
            is_reflashed_img = False

        drag_result = drag_to_breach(move_distance)
        if drag_result:
            break

    page.wait_for_timeout(3000)
    print("识别结束，退出程序")
    # input("为方便调试，可启用此代码，避免浏览器关闭")
    browser.close()

运行结果

运行上述代码后，我们可以在控制台中看到整个识别的过程。并且我们可以在指定目录中找到视频文件，观察整个识别过程的效果。

使用Playwright识别腾讯云滑动验证码对于一般的验证码识别来说要更加复杂一些，但是对于一些交互性强的验证码可能是更好的解决方案。当然我们也可以结合其他的验证码识别库一同使用，提高识别的准确性。

完整代码已上传到我的 GitHub。

posted @ 2023-04-06 10:52 阿宇和阿屿阅读(671) 评论(0) 收藏举报

刷新页面返回顶部

持续优化系统