基于深度学习的验证码识别系统实战指南

一、项目概述
本教程将带您从零开始构建一个完整的验证码识别系统，涵盖数据采集、模型训练、性能优化和实际部署全流程。我们将使用PyTorch框架实现一个高效的验证码识别模型，并最终将其部署为可用的API服务。

二、环境配置
首先确保安装以下依赖：

bash
pip install torch torchvision pillow opencv-python numpy matplotlib flask requests tqdm
三、数据准备
3.1 真实验证码采集
创建data_collector.py：

python
import requests
from PIL import Image
import os
import time
import random

def download_captchas(url, num_samples=1000, output_dir="real_captchas"):
if not os.path.exists(output_dir):
os.makedirs(output_dir)

session = requests.Session()
headers = {网站地址www.tmocr.com或联系q1092685548
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}

for i in range(num_samples):
    try:
        # 获取验证码图片
        response = session.get(url, headers=headers)
        img_path = os.path.join(output_dir, f"captcha_{int(time.time())}_{random.randint(1000,9999)}.jpg")
        
        with open(img_path, 'wb') as f:
            f.write(response.content)
        
        # 显示进度
        if (i+1) % 100 == 0:
            print(f"已下载 {i+1}/{num_samples} 张验证码")
        
        # 避免请求过于频繁
        time.sleep(random.uniform(0.5, 1.5))
        
    except Exception as e:
        print(f"下载失败: {e}")
        continue

if name == "main":
target_url = "https://example.com/captcha.jpg" # 替换为实际目标网站
download_captchas(target_url, 2000)
3.2 数据标注工具
创建label_tool.py：

python
import cv2
import os
import json
from pathlib import Path

class CaptchaLabeler:
def init(self, image_dir):
self.image_dir = image_dir
self.image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]
self.current_index = 0
self.labels = {}
self.load_existing_labels()

def load_existing_labels(self):
    label_file = Path(self.image_dir) / "labels.json"
    if label_file.exists():
        with open(label_file, 'r') as f:
            self.labels = json.load(f)

def save_labels(self):
    label_file = Path(self.image_dir) / "labels.json"
    with open(label_file, 'w') as f:
        json.dump(self.labels, f, indent=4)

def label_images(self):
    cv2.namedWindow("Captcha Labeler", cv2.WINDOW_NORMAL)
    
    while self.current_index < len(self.image_files):
        img_name = self.image_files[self.current_index]
        img_path = os.path.join(self.image_dir, img_name)
        
        # 跳过已标注图片
        if img_name in self.labels:
            self.current_index += 1
            continue
        
        image = cv2.imread(img_path)
        if image is None:
            print(f"无法加载图片: {img_path}")
            self.current_index += 1
            continue
        
        # 显示图片和操作说明
        cv2.imshow("Captcha Labeler", image)
        print(f"当前图片: {img_name}")
        print("请输入验证码内容（输入'skip'跳过，'quit'退出）：")
        
        user_input = input().strip()
        
        if user_input.lower() == 'quit':
            break
        elif user_input.lower() == 'skip':
            self.current_index += 1
            continue
        else:
            self.labels[img_name] = user_input
            self.current_index += 1
    
    cv2.destroyAllWindows()
    self.save_labels()
    print("标注完成！")

if name == "main":
labeler = CaptchaLabeler("real_captchas")
labeler.label_images()
四、高级模型架构
创建advanced_model.py：

python
import torch
import torch.nn as nn
import torch.nn.functional as F
from efficientnet_pytorch import EfficientNet

class HybridCaptchaModel(nn.Module):
def init(self, num_chars, pretrained=True):
super().init()

    # 使用EfficientNet作为特征提取器
    self.backbone = EfficientNet.from_pretrained('efficientnet-b3') if pretrained else EfficientNet.from_name('efficientnet-b3')
    
    # 替换最后的全连接层
    in_features = self.backbone._fc.in_features
    self.backbone._fc = nn.Identity()
    
    # 注意力机制
    self.attention = nn.Sequential(
        nn.Linear(in_features, in_features//2),
        nn.ReLU(),
        nn.Linear(in_features//2, 1),
        nn.Sigmoid()
    )
    
    # 双向GRU
    self.gru = nn.GRU(
        input_size=in_features,
        hidden_size=512,
        num_layers=3,
        bidirectional=True,
        dropout=0.3
    )
    
    # 输出层
    self.fc = nn.Linear(1024, num_chars)
    
    # 初始化权重
    self._init_weights()

def _init_weights(self):
    for name, param in self.gru.named_parameters():
        if 'weight' in name:
            nn.init.orthogonal_(param)
    nn.init.kaiming_normal_(self.fc.weight)

def forward(self, x):
    # 提取特征
    features = self.backbone(x)  # [B, C]
    features = features.unsqueeze(1)  # [B, 1, C]
    
    # 应用注意力
    attn_weights = self.attention(features.squeeze(1))  # [B, 1]
    features = features * attn_weights.unsqueeze(-1)  # [B, 1, C]
    
    # RNN处理
    features = features.transpose(0, 1)  # [1, B, C]
    rnn_out, _ = self.gru(features)  # [1, B, 2*H]
    
    # 输出预测
    output = self.fc(rnn_out)  # [1, B, num_chars]
    output = output.permute(1, 0, 2)  # [B, 1, num_chars]
    
    return output

五、模型训练与优化
创建train_advanced.py：

python
import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from advanced_model import HybridCaptchaModel
from data_loader import get_data_loaders
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

训练配置

class Config:
batch_size = 32
epochs = 50
lr = 3e-4
min_lr = 1e-6
weight_decay = 1e-4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_save_path = 'best_model.pth'
early_stop_patience = 5

def train_model():
# 初始化
config = Config()
train_loader, val_loader = get_data_loaders(config.batch_size)
model = HybridCaptchaModel(len(CHAR_SET)).to(config.device)

# 优化器和学习率调度
optimizer = AdamW(
    model.parameters(),
    lr=config.lr,
    weight_decay=config.weight_decay
)
scheduler = CosineAnnealingLR(
    optimizer,
    T_max=config.epochs,
    eta_min=config.min_lr
)

# 损失函数
criterion = nn.CTCLoss(blank=len(CHAR_SET))

# 训练循环
best_val_loss = float('inf')
patience_counter = 0

for epoch in range(config.epochs):
    # 训练阶段
    model.train()
    train_loss = 0
    progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{config.epochs} [Train]')
    
    for images, targets, _ in progress_bar:
        images = images.to(config.device)
        targets = targets.to(config.device)
        
        optimizer.zero_grad()
        outputs = model(images)
        
        # 准备CTC输入
        input_lengths = torch.tensor([outputs.size(0)] * outputs.size(1), device=config.device)
        target_lengths = torch.tensor([targets.size(1)] * targets.size(0), device=config.device)
        
        loss = criterion(
            outputs.log_softmax(2),
            targets,
            input_lengths,
            target_lengths
        )
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        
        train_loss += loss.item()
        progress_bar.set_postfix({'loss': loss.item()})
    
    # 验证阶段
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, targets, target_strs in val_loader:
            images = images.to(config.device)
            targets = targets.to(config.device)
            
            outputs = model(images)
            
            # 计算验证损失
            input_lengths = torch.tensor([outputs.size(0)] * outputs.size(1), device=config.device)
            target_lengths = torch.tensor([targets.size(1)] * targets.size(0), device=config.device)
            
            loss = criterion(
                outputs.log_softmax(2),
                targets,
                input_lengths,
                target_lengths
            )
            val_loss += loss.item()
            
            # 计算准确率
            _, predicted = torch.max(outputs, 2)
            predicted = predicted.transpose(0, 1)
            
            for i in range(len(target_strs)):
                pred_str = ''.join([IDX_TO_CHAR[p] for p in predicted[i] if p < len(CHAR_SET)])
                if pred_str == target_strs[i]:
                    correct += 1
                total += 1
    
    # 更新学习率
    scheduler.step()
    
    # 打印统计信息
    train_loss /= len(train_loader)
    val_loss /= len(val_loader)
    val_acc = correct / total
    
    print(f"\nEpoch {epoch+1} Summary:")
    print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
    
    # 早停和模型保存
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), config.model_save_path)
        patience_counter = 0
        print(f"模型已保存，验证损失: {val_loss:.4f}")
    else:
        patience_counter += 1
        if patience_counter >= config.early_stop_patience:
            print("早停触发，训练终止")
            break

if name == "main":
train_model()
六、模型部署与API服务
创建deployment.py：

python
from flask import Flask, request, jsonify
import torch
from advanced_model import HybridCaptchaModel
from PIL import Image
import io
import numpy as np
from torchvision import transforms
import time

app = Flask(name)

加载模型

model = HybridCaptchaModel(len(CHAR_SET))
model.load_state_dict(torch.load('best_model.pth', map_location='cpu'))
model.eval()

图像预处理

transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

@app.route('/predict', methods=['POST'])
def predict():
start_time = time.time()

if 'file' not in request.files:
    return jsonify({'error': 'No file provided'}), 400

file = request.files['file']
if file.filename == '':
    return jsonify({'error': 'Empty filename'}), 400

try:
    # 读取并预处理图像
    image_bytes = file.read()
    image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
    image = transform(image).unsqueeze(0)
    
    # 预测
    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 2)
        predicted = predicted.squeeze(0)
        captcha_text = ''.join([IDX_TO_CHAR[p] for p in predicted if p < len(CHAR_SET)])
    
    processing_time = time.time() - start_time
    
    return jsonify({
        'prediction': captcha_text,
        'confidence': 0.95,  # 可替换为实际置信度计算
        'processing_time': f"{processing_time:.3f}s"
    })

except Exception as e:
    return jsonify({'error': str(e)}), 500

@app.route('/batch_predict', methods=['POST'])
def batch_predict():
start_time = time.time()

if 'files' not in request.files:
    return jsonify({'error': 'No files provided'}), 400

files = request.files.getlist('files')
if not files:
    return jsonify({'error': 'Empty file list'}), 400

try:
    results = []
    for file in files:
        # 读取并预处理图像
        image_bytes = file.read()
        image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
        image = transform(image).unsqueeze(0)
        
        # 预测
        with torch.no_grad():
            output = model(image)
            _, predicted = torch.max(output, 2)
            predicted = predicted.squeeze(0)
            captcha_text = ''.join([IDX_TO_CHAR[p] for p in predicted if p < len(CHAR_SET)])
        
        results.append({
            'filename': file.filename,
            'prediction': captcha_text
        })
    
    processing_time = time.time() - start_time
    
    return jsonify({
        'results': results,
        'total_time': f"{processing_time:.3f}s",
        'avg_time': f"{processing_time/len(files):.3f}s"
    })

except Exception as e:
    return jsonify({'error': str(e)}), 500

if name == 'main':
app.run(host='0.0.0.0', port=5000, threaded=True)
七、性能优化技巧
7.1 模型量化
python

量化模型以提升推理速度

def quantize_model():
model = HybridCaptchaModel(len(CHAR_SET))
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

# 动态量化
quantized_model = torch.quantization.quantize_dynamic(
    model,
    {torch.nn.Linear, torch.nn.GRU},
    dtype=torch.qint8
)

# 保存量化模型
torch.save(quantized_model.state_dict(), 'quantized_model.pth')
print("模型量化完成！")

7.2 ONNX导出
python
def export_to_onnx():
model = HybridCaptchaModel(len(CHAR_SET))
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

dummy_input = torch.randn(1, 3, 224, 224)

torch.onnx.export(
    model,
    dummy_input,
    "captcha_model.onnx",
    input_names=["input"],
    output_names=["output"],
    dynamic_axes={
        "input": {0: "batch_size"},
        "output": {0: "batch_size"}
    },
    opset_version=13
)
print("ONNX模型导出成功！")

posted @ 2025-05-10 21:55 tmcor 阅读(175) 评论(0) 收藏举报

刷新页面返回顶部

tmcor

基于深度学习的验证码识别系统实战指南

训练配置

加载模型

图像预处理

量化模型以提升推理速度

公告