基于深度学习的验证码识别系统实战指南

一、项目概述
本教程将带您从零开始构建一个完整的验证码识别系统,涵盖数据采集、模型训练、性能优化和实际部署全流程。我们将使用PyTorch框架实现一个高效的验证码识别模型,并最终将其部署为可用的API服务。

二、环境配置
首先确保安装以下依赖:

bash
pip install torch torchvision pillow opencv-python numpy matplotlib flask requests tqdm
三、数据准备
3.1 真实验证码采集
创建data_collector.py:

python
import requests
from PIL import Image
import os
import time
import random

def download_captchas(url, num_samples=1000, output_dir="real_captchas"):
if not os.path.exists(output_dir):
os.makedirs(output_dir)

session = requests.Session()
headers = {网站地址www.tmocr.com或联系q1092685548
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}

for i in range(num_samples):
try:
# 获取验证码图片
response = session.get(url, headers=headers)
img_path = os.path.join(output_dir, f"captcha_{int(time.time())}_{random.randint(1000,9999)}.jpg")

with open(img_path, 'wb') as f:
f.write(response.content)

显示进度

if (i+1) % 100 == 0:
print(f"已下载 {i+1}/{num_samples} 张验证码")

避免请求过于频繁

time.sleep(random.uniform(0.5, 1.5))

except Exception as e:
print(f"下载失败: {e}")
continue

if name == "main":
target_url = "https://example.com/captcha.jpg" # 替换为实际目标网站
download_captchas(target_url, 2000)
3.2 数据标注工具
创建label_tool.py:

python
import cv2
import os
import json
from pathlib import Path

class CaptchaLabeler:
def init(self, image_dir):
self.image_dir = image_dir
self.image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]
self.current_index = 0
self.labels = {}
self.load_existing_labels()

def load_existing_labels(self):
label_file = Path(self.image_dir) / "labels.json"
if label_file.exists():
with open(label_file, 'r') as f:
self.labels = json.load(f)

def save_labels(self):
label_file = Path(self.image_dir) / "labels.json"
with open(label_file, 'w') as f:
json.dump(self.labels, f, indent=4)

def label_images(self):
cv2.namedWindow("Captcha Labeler", cv2.WINDOW_NORMAL)

while self.current_index < len(self.image_files):
img_name = self.image_files[self.current_index]
img_path = os.path.join(self.image_dir, img_name)

跳过已标注图片

if img_name in self.labels:
self.current_index += 1
continue

image = cv2.imread(img_path)
if image is None:
print(f"无法加载图片: {img_path}")
self.current_index += 1
continue

显示图片和操作说明

cv2.imshow("Captcha Labeler", image)
print(f"当前图片: {img_name}")
print("请输入验证码内容(输入'skip'跳过,'quit'退出):")

user_input = input().strip()

if user_input.lower() == 'quit':
break
elif user_input.lower() == 'skip':
self.current_index += 1
continue
else:
self.labels[img_name] = user_input
self.current_index += 1

cv2.destroyAllWindows()
self.save_labels()
print("标注完成!")

if name == "main":
labeler = CaptchaLabeler("real_captchas")
labeler.label_images()
四、高级模型架构
创建advanced_model.py:

python
import torch
import torch.nn as nn
import torch.nn.functional as F
from efficientnet_pytorch import EfficientNet

class HybridCaptchaModel(nn.Module):
def init(self, num_chars, pretrained=True):
super().init()

使用EfficientNet作为特征提取器

self.backbone = EfficientNet.from_pretrained('efficientnet-b3') if pretrained else EfficientNet.from_name('efficientnet-b3')

替换最后的全连接层

in_features = self.backbone._fc.in_features
self.backbone._fc = nn.Identity()

注意力机制

self.attention = nn.Sequential(
nn.Linear(in_features, in_features//2),
nn.ReLU(),
nn.Linear(in_features//2, 1),
nn.Sigmoid()
)

双向GRU

self.gru = nn.GRU(
input_size=in_features,
hidden_size=512,
num_layers=3,
bidirectional=True,
dropout=0.3
)

输出层

self.fc = nn.Linear(1024, num_chars)

初始化权重

self._init_weights()

def init_weights(self):
for name, param in self.gru.named_parameters():
if 'weight' in name:
nn.init.orthogonal
(param)
nn.init.kaiming_normal_(self.fc.weight)

def forward(self, x):
# 提取特征
features = self.backbone(x) # [B, C]
features = features.unsqueeze(1) # [B, 1, C]

应用注意力

attn_weights = self.attention(features.squeeze(1)) # [B, 1]
features = features * attn_weights.unsqueeze(-1) # [B, 1, C]

RNN处理

features = features.transpose(0, 1) # [1, B, C]
rnn_out, _ = self.gru(features) # [1, B, 2*H]

输出预测

output = self.fc(rnn_out) # [1, B, num_chars]
output = output.permute(1, 0, 2) # [B, 1, num_chars]

return output
五、模型训练与优化
创建train_advanced.py:

python
import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from advanced_model import HybridCaptchaModel
from data_loader import get_data_loaders
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

训练配置

class Config:
batch_size = 32
epochs = 50
lr = 3e-4
min_lr = 1e-6
weight_decay = 1e-4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_save_path = 'best_model.pth'
early_stop_patience = 5

def train_model():
# 初始化
config = Config()
train_loader, val_loader = get_data_loaders(config.batch_size)
model = HybridCaptchaModel(len(CHAR_SET)).to(config.device)

优化器和学习率调度

optimizer = AdamW(
model.parameters(),
lr=config.lr,
weight_decay=config.weight_decay
)
scheduler = CosineAnnealingLR(
optimizer,
T_max=config.epochs,
eta_min=config.min_lr
)

损失函数

criterion = nn.CTCLoss(blank=len(CHAR_SET))

训练循环

best_val_loss = float('inf')
patience_counter = 0

for epoch in range(config.epochs):
# 训练阶段
model.train()
train_loss = 0
progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{config.epochs} [Train]')

for images, targets, _ in progress_bar:
images = images.to(config.device)
targets = targets.to(config.device)

optimizer.zero_grad()
outputs = model(images)

准备CTC输入

input_lengths = torch.tensor([outputs.size(0)] * outputs.size(1), device=config.device)
target_lengths = torch.tensor([targets.size(1)] * targets.size(0), device=config.device)

loss = criterion(
outputs.log_softmax(2),
targets,
input_lengths,
target_lengths
)

loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()

train_loss += loss.item()
progress_bar.set_postfix({'loss': loss.item()})

验证阶段

model.eval()
val_loss = 0
correct = 0
total = 0

with torch.no_grad():
for images, targets, target_strs in val_loader:
images = images.to(config.device)
targets = targets.to(config.device)

outputs = model(images)

计算验证损失

input_lengths = torch.tensor([outputs.size(0)] * outputs.size(1), device=config.device)
target_lengths = torch.tensor([targets.size(1)] * targets.size(0), device=config.device)

loss = criterion(
outputs.log_softmax(2),
targets,
input_lengths,
target_lengths
)
val_loss += loss.item()

计算准确率

_, predicted = torch.max(outputs, 2)
predicted = predicted.transpose(0, 1)

for i in range(len(target_strs)):
pred_str = ''.join([IDX_TO_CHAR[p] for p in predicted[i] if p < len(CHAR_SET)])
if pred_str == target_strs[i]:
correct += 1
total += 1

更新学习率

scheduler.step()

打印统计信息

train_loss /= len(train_loader)
val_loss /= len(val_loader)
val_acc = correct / total

print(f"\nEpoch {epoch+1} Summary:")
print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

早停和模型保存

if val_loss < best_val_loss:
best_val_loss = val_loss
torch.save(model.state_dict(), config.model_save_path)
patience_counter = 0
print(f"模型已保存,验证损失: {val_loss:.4f}")
else:
patience_counter += 1
if patience_counter >= config.early_stop_patience:
print("早停触发,训练终止")
break

if name == "main":
train_model()
六、模型部署与API服务
创建deployment.py:

python
from flask import Flask, request, jsonify
import torch
from advanced_model import HybridCaptchaModel
from PIL import Image
import io
import numpy as np
from torchvision import transforms
import time

app = Flask(name)

加载模型

model = HybridCaptchaModel(len(CHAR_SET))
model.load_state_dict(torch.load('best_model.pth', map_location='cpu'))
model.eval()

图像预处理

transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

@app.route('/predict', methods=['POST'])
def predict():
start_time = time.time()

if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400

file = request.files['file']
if file.filename == '':
return jsonify({'error': 'Empty filename'}), 400

try:
# 读取并预处理图像
image_bytes = file.read()
image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
image = transform(image).unsqueeze(0)

预测

with torch.no_grad():
output = model(image)
_, predicted = torch.max(output, 2)
predicted = predicted.squeeze(0)
captcha_text = ''.join([IDX_TO_CHAR[p] for p in predicted if p < len(CHAR_SET)])

processing_time = time.time() - start_time

return jsonify({
'prediction': captcha_text,
'confidence': 0.95, # 可替换为实际置信度计算
'processing_time': f"{processing_time:.3f}s"
})

except Exception as e:
return jsonify({'error': str(e)}), 500

@app.route('/batch_predict', methods=['POST'])
def batch_predict():
start_time = time.time()

if 'files' not in request.files:
return jsonify({'error': 'No files provided'}), 400

files = request.files.getlist('files')
if not files:
return jsonify({'error': 'Empty file list'}), 400

try:
results = []
for file in files:
# 读取并预处理图像
image_bytes = file.read()
image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
image = transform(image).unsqueeze(0)

预测

with torch.no_grad():
output = model(image)
_, predicted = torch.max(output, 2)
predicted = predicted.squeeze(0)
captcha_text = ''.join([IDX_TO_CHAR[p] for p in predicted if p < len(CHAR_SET)])

results.append({
'filename': file.filename,
'prediction': captcha_text
})

processing_time = time.time() - start_time

return jsonify({
'results': results,
'total_time': f"{processing_time:.3f}s",
'avg_time': f"{processing_time/len(files):.3f}s"
})

except Exception as e:
return jsonify({'error': str(e)}), 500

if name == 'main':
app.run(host='0.0.0.0', port=5000, threaded=True)
七、性能优化技巧
7.1 模型量化
python

量化模型以提升推理速度

def quantize_model():
model = HybridCaptchaModel(len(CHAR_SET))
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

动态量化

quantized_model = torch.quantization.quantize_dynamic(
model,
{torch.nn.Linear, torch.nn.GRU},
dtype=torch.qint8
)

保存量化模型

torch.save(quantized_model.state_dict(), 'quantized_model.pth')
print("模型量化完成!")
7.2 ONNX导出
python
def export_to_onnx():
model = HybridCaptchaModel(len(CHAR_SET))
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

dummy_input = torch.randn(1, 3, 224, 224)

torch.onnx.export(
model,
dummy_input,
"captcha_model.onnx",
input_names=["input"],
output_names=["output"],
dynamic_axes={
"input": {0: "batch_size"},
"output": {0: "batch_size"}
},
opset_version=13
)
print("ONNX模型导出成功!")

posted @ 2025-05-10 21:55  tmcor  阅读(216)  评论(0)    收藏  举报