YOLO26 识别验证码

最近 YOLO26 出来了，我还没使用过 yolo ,用它来做一个测试。注意：直接用OCR 识别验证码更加方便。用YOLO 不太合适，我只是为了做测试！

先看效果！

codes.yml 文件如下：

# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license

# COCO8 dataset (first 8 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/detect/coco8/
# Example usage: yolo train data=coco8.yaml
# parent
# ├── ultralytics
# └── datasets
#     └── coco8 ← downloads here (1 MB)

# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: D:\learn\yolo\dataset\yolo_dataset_2026-04-28 # dataset root dir
train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images
test: # test images (optional)

# Classes
names:
  0: "0"
  1: "1"
  2: "2"
  3: "3"
  4: "4"
  5: "5"
  6: "6"
  7: "7"
  8: "8"
  9: "9"
  10: "+"
  11: "-"
  12: "*"
  13: "/"
  14: "="
  15: "?"

yolo cli 命令如下

#训练，由于只是测试，所以没配置其他训练参数
yolo detect train data="D:\learn\yolo\ultralytics\ultralytics\cfg\datasets\codes.yaml" model=yolo26n.pt epochs=100 imgsz=640 batch =0.5 workers=1

#验证
yolo detect val model=codes.pt data="D:\learn\yolo\ultralytics\ultralytics\cfg\datasets\codes.yaml" nc=16

#预测
yolo predict model=codes.pt source="D:\learn\yolo\ultralytics\ultralytics\test\codes_processed.png"

#导出 为了 java 代码 https://gitee.com/agricultureiot/yolo-onnx-java 导出成 onnx，这个项目只支持 opset 19 以下

yolo export model=codes.pt format=onnx opset=18

python 脚本如下：

import time
import requests
from ultralytics import YOLO
from PIL import Image
import numpy as np

# Load a pretrained YOLO26n model
model = YOLO("../codes2.pt")

# 生成随机码并下载验证码图片
random_str = str(int(time.time() * 1000))

# java pig4cloud 管理后台验证码地址
url = f"https://www.xxxx.cn/auth/code/image?randomStr={random_str}"

print(f"正在下载验证码图片...")
print(f"请求URL: {url}")

try:
    response = requests.get(url, timeout=10)
    response.raise_for_status()

    # 保存图片到上一级目录
    with open("codes.png", "wb") as f:
        f.write(response.content)

    print("验证码图片下载成功: codes.png")
except Exception as e:
    print(f"下载验证码图片失败: {e}")
    exit(1)

results = model("codes.png")

# 处理预测结果
for result in results:
    # 获取检测框
    boxes = result.boxes

    if boxes is not None:
        # 获取分类ID
        class_ids = boxes.cls.cpu().numpy()

        # 获取置信度
        confidences = boxes.conf.cpu().numpy()

        # 获取边界框坐标 (xyxy格式: x1, y1, x2, y2)
        box_coords = boxes.xyxy.cpu().numpy()

        # 获取分类名称列表
        names = result.names

        # 将检测结果组合成列表，每个元素包含位置信息和检测信息
        detections = []
        for i, (class_id, confidence, box) in enumerate(zip(class_ids, confidences, box_coords)):
            x1, y1, x2, y2 = box
            center_x = (x1 + x2) / 2  # 计算中心点x坐标
            detections.append({
                'index': i,
                'class_id': int(class_id),
                'confidence': confidence,
                'class_name': names[int(class_id)],
                'box': box,
                'center_x': center_x,
                'x1': x1
            })

        # 按照中心点x坐标从左到右排序
        detections.sort(key=lambda d: d['center_x'])

        # 打印排序后的检测结果
        print(f"检测到 {len(detections)} 个目标（已按从左到右排序）:")
        print("-" * 50)


        def calculate_overlap_ratio(box1, box2):
            """计算两个框的重叠比例（相对于较小框的面积）"""
            x1_1, y1_1, x2_1, y2_1 = box1
            x1_2, y1_2, x2_2, y2_2 = box2

            # 计算交集区域
            inter_x1 = max(x1_1, x1_2)
            inter_y1 = max(y1_1, y1_2)
            inter_x2 = min(x2_1, x2_2)
            inter_y2 = min(y2_1, y2_2)

            # 检查是否有交集
            if inter_x2 < inter_x1 or inter_y2 < inter_y1:
                return 0.0

            # 计算交集面积
            inter_area = (inter_x2 - inter_x1) * (inter_y2 - inter_y1)

            # 计算两个框的面积
            area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
            area2 = (x2_2 - x1_2) * (y2_2 - y1_2)

            # 使用较小框的面积作为基准
            min_area = min(area1, area2)

            if min_area == 0:
                return 0.0

            # 返回重叠比例
            return inter_area / min_area


        def filter_overlapping_detections(detections, overlap_threshold=0.5):
            """过滤重叠的检测框，对于相同class_id且重叠超过阈值的，只保留第一个"""
            filtered = []
            used_indices = set()

            for i in range(len(detections)):
                if i in used_indices:
                    continue

                # 添加当前检测到结果列表
                filtered.append(i)

                # 检查后续检测是否与当前检测重叠
                for j in range(i + 1, len(detections)):
                    if j in used_indices:
                        continue

                    # 只检查相同class_id的
                    if detections[i]['class_id'] != detections[j]['class_id']:
                        continue

                    # 计算重叠比例
                    overlap_ratio = calculate_overlap_ratio(
                        detections[i]['box'],
                        detections[j]['box']
                    )

                    # 如果重叠超过阈值，标记为已使用（跳过）
                    if overlap_ratio > overlap_threshold:
                        used_indices.add(j)
                        print(f"  [过滤] 目标{j + 1}与目标{i + 1}重叠{overlap_ratio:.2%}，已跳过")

            return filtered


        # 过滤重叠的检测
        valid_indices = filter_overlapping_detections(detections, overlap_threshold=0.5)

        allClasName = ""

        for idx, det_idx in enumerate(valid_indices, 1):
            det = detections[det_idx]
            print(f"目标 {idx}:")
            print(f"  分类ID: {det['class_id']}")
            print(f"  分类名称: {det['class_name']}")
            print(f"  置信度: {det['confidence']:.4f}")
            print(
                f"  边界框: x1={det['box'][0]:.1f}, y1={det['box'][1]:.1f}, x2={det['box'][2]:.1f}, y2={det['box'][3]:.1f}")
            print(f"  中心点x坐标: {det['center_x']:.1f}")
            print("-" * 50)
            allClasName = f"{allClasName}  {det['class_name']}"

        print(allClasName)
    else:
        print("未检测到任何目标")

　　不要问我python 什么意思，我也不懂。ai给我生成的！！！

简诉下遇到的问题：

1.训练图片数量比较少，准确率低

2.标注错了好多图片，导致准确率非常低

3.由于数字是空心的，导致部分数字重复识别出来，例如 3+2=？的图片识别出来 3 3 + 2 = ？,这个时候 class_id 重复，且堆叠超过 50% 的，我就丢弃后面重复的结果得到 3 + 2 = ？

posted @ 2026-04-28 16:10 hujunmin 阅读(29) 评论(0) 收藏举报

刷新页面返回顶部

YOLO26 识别验证码

公告