YOLO26 识别验证码

最近 YOLO26 出来了,我还没使用过 yolo ,用它来做一个测试。注意:直接用OCR 识别验证码更加方便。用YOLO 不太合适,我只是为了做测试!

先看效果!

image

 

 

codes.yml 文件如下:

# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license

# COCO8 dataset (first 8 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/detect/coco8/
# Example usage: yolo train data=coco8.yaml
# parent
# ├── ultralytics
# └── datasets
#     └── coco8 ← downloads here (1 MB)

# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: D:\learn\yolo\dataset\yolo_dataset_2026-04-28 # dataset root dir
train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images
test: # test images (optional)

# Classes
names:
  0: "0"
  1: "1"
  2: "2"
  3: "3"
  4: "4"
  5: "5"
  6: "6"
  7: "7"
  8: "8"
  9: "9"
  10: "+"
  11: "-"
  12: "*"
  13: "/"
  14: "="
  15: "?"

  

yolo cli 命令如下

#训练,由于只是测试,所以没配置其他训练参数
yolo detect train data="D:\learn\yolo\ultralytics\ultralytics\cfg\datasets\codes.yaml" model=yolo26n.pt epochs=100 imgsz=640 batch =0.5 workers=1

#验证
yolo detect val model=codes.pt data="D:\learn\yolo\ultralytics\ultralytics\cfg\datasets\codes.yaml" nc=16

#预测
yolo predict model=codes.pt source="D:\learn\yolo\ultralytics\ultralytics\test\codes_processed.png"

#导出 为了 java 代码 https://gitee.com/agricultureiot/yolo-onnx-java 导出成 onnx,这个项目只支持 opset 19 以下
yolo export model=codes.pt format=onnx opset=18

 

python 脚本如下:

import time
import requests
from ultralytics import YOLO
from PIL import Image
import numpy as np

# Load a pretrained YOLO26n model
model = YOLO("../codes2.pt")

# 生成随机码并下载验证码图片
random_str = str(int(time.time() * 1000))

# java pig4cloud 管理后台验证码地址 url = f"https://www.xxxx.cn/auth/code/image?randomStr={random_str}" print(f"正在下载验证码图片...") print(f"请求URL: {url}") try: response = requests.get(url, timeout=10) response.raise_for_status() # 保存图片到上一级目录 with open("codes.png", "wb") as f: f.write(response.content) print("验证码图片下载成功: codes.png") except Exception as e: print(f"下载验证码图片失败: {e}") exit(1) results = model("codes.png") # 处理预测结果 for result in results: # 获取检测框 boxes = result.boxes if boxes is not None: # 获取分类ID class_ids = boxes.cls.cpu().numpy() # 获取置信度 confidences = boxes.conf.cpu().numpy() # 获取边界框坐标 (xyxy格式: x1, y1, x2, y2) box_coords = boxes.xyxy.cpu().numpy() # 获取分类名称列表 names = result.names # 将检测结果组合成列表,每个元素包含位置信息和检测信息 detections = [] for i, (class_id, confidence, box) in enumerate(zip(class_ids, confidences, box_coords)): x1, y1, x2, y2 = box center_x = (x1 + x2) / 2 # 计算中心点x坐标 detections.append({ 'index': i, 'class_id': int(class_id), 'confidence': confidence, 'class_name': names[int(class_id)], 'box': box, 'center_x': center_x, 'x1': x1 }) # 按照中心点x坐标从左到右排序 detections.sort(key=lambda d: d['center_x']) # 打印排序后的检测结果 print(f"检测到 {len(detections)} 个目标(已按从左到右排序):") print("-" * 50) def calculate_overlap_ratio(box1, box2): """计算两个框的重叠比例(相对于较小框的面积)""" x1_1, y1_1, x2_1, y2_1 = box1 x1_2, y1_2, x2_2, y2_2 = box2 # 计算交集区域 inter_x1 = max(x1_1, x1_2) inter_y1 = max(y1_1, y1_2) inter_x2 = min(x2_1, x2_2) inter_y2 = min(y2_1, y2_2) # 检查是否有交集 if inter_x2 < inter_x1 or inter_y2 < inter_y1: return 0.0 # 计算交集面积 inter_area = (inter_x2 - inter_x1) * (inter_y2 - inter_y1) # 计算两个框的面积 area1 = (x2_1 - x1_1) * (y2_1 - y1_1) area2 = (x2_2 - x1_2) * (y2_2 - y1_2) # 使用较小框的面积作为基准 min_area = min(area1, area2) if min_area == 0: return 0.0 # 返回重叠比例 return inter_area / min_area def filter_overlapping_detections(detections, overlap_threshold=0.5): """过滤重叠的检测框,对于相同class_id且重叠超过阈值的,只保留第一个""" filtered = [] used_indices = set() for i in range(len(detections)): if i in used_indices: continue # 添加当前检测到结果列表 filtered.append(i) # 检查后续检测是否与当前检测重叠 for j in range(i + 1, len(detections)): if j in used_indices: continue # 只检查相同class_id的 if detections[i]['class_id'] != detections[j]['class_id']: continue # 计算重叠比例 overlap_ratio = calculate_overlap_ratio( detections[i]['box'], detections[j]['box'] ) # 如果重叠超过阈值,标记为已使用(跳过) if overlap_ratio > overlap_threshold: used_indices.add(j) print(f" [过滤] 目标{j + 1}与目标{i + 1}重叠{overlap_ratio:.2%},已跳过") return filtered # 过滤重叠的检测 valid_indices = filter_overlapping_detections(detections, overlap_threshold=0.5) allClasName = "" for idx, det_idx in enumerate(valid_indices, 1): det = detections[det_idx] print(f"目标 {idx}:") print(f" 分类ID: {det['class_id']}") print(f" 分类名称: {det['class_name']}") print(f" 置信度: {det['confidence']:.4f}") print( f" 边界框: x1={det['box'][0]:.1f}, y1={det['box'][1]:.1f}, x2={det['box'][2]:.1f}, y2={det['box'][3]:.1f}") print(f" 中心点x坐标: {det['center_x']:.1f}") print("-" * 50) allClasName = f"{allClasName} {det['class_name']}" print(allClasName) else: print("未检测到任何目标")

  不要问我python 什么意思,我也不懂。ai给我生成的!!!

 

简诉下遇到的问题:

1.训练图片数量比较少,准确率低

2.标注错了好多图片,导致准确率非常低

3.由于数字是空心的,导致部分数字重复识别出来,例如 3+2=? 的图片识别出来 3 3 + 2 = ?,这个时候 class_id 重复,且堆叠 超过 50% 的,我就丢弃后面重复的结果得到 3 + 2 = ?

posted @ 2026-04-28 16:10  hujunmin  阅读(9)  评论(0)    收藏  举报