基于深度学习的验证码识别系统实战指南
一、项目概述
本教程将带您从零开始构建一个完整的验证码识别系统,涵盖数据采集、模型训练、性能优化和实际部署全流程。我们将使用PyTorch框架实现一个高效的验证码识别模型,并最终将其部署为可用的API服务。
二、环境配置
首先确保安装以下依赖:
bash
pip install torch torchvision pillow opencv-python numpy matplotlib flask requests tqdm
三、数据准备
3.1 真实验证码采集
创建data_collector.py:
python
import requests
from PIL import Image
import os
import time
import random
def download_captchas(url, num_samples=1000, output_dir="real_captchas"):
if not os.path.exists(output_dir):
os.makedirs(output_dir)
session = requests.Session()
headers = {网站地址www.tmocr.com或联系q1092685548
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
for i in range(num_samples):
try:
# 获取验证码图片
response = session.get(url, headers=headers)
img_path = os.path.join(output_dir, f"captcha_{int(time.time())}_{random.randint(1000,9999)}.jpg")
with open(img_path, 'wb') as f:
f.write(response.content)
# 显示进度
if (i+1) % 100 == 0:
print(f"已下载 {i+1}/{num_samples} 张验证码")
# 避免请求过于频繁
time.sleep(random.uniform(0.5, 1.5))
except Exception as e:
print(f"下载失败: {e}")
continue
if name == "main":
target_url = "https://example.com/captcha.jpg" # 替换为实际目标网站
download_captchas(target_url, 2000)
3.2 数据标注工具
创建label_tool.py:
python
import cv2
import os
import json
from pathlib import Path
class CaptchaLabeler:
def init(self, image_dir):
self.image_dir = image_dir
self.image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]
self.current_index = 0
self.labels = {}
self.load_existing_labels()
def load_existing_labels(self):
label_file = Path(self.image_dir) / "labels.json"
if label_file.exists():
with open(label_file, 'r') as f:
self.labels = json.load(f)
def save_labels(self):
label_file = Path(self.image_dir) / "labels.json"
with open(label_file, 'w') as f:
json.dump(self.labels, f, indent=4)
def label_images(self):
cv2.namedWindow("Captcha Labeler", cv2.WINDOW_NORMAL)
while self.current_index < len(self.image_files):
img_name = self.image_files[self.current_index]
img_path = os.path.join(self.image_dir, img_name)
# 跳过已标注图片
if img_name in self.labels:
self.current_index += 1
continue
image = cv2.imread(img_path)
if image is None:
print(f"无法加载图片: {img_path}")
self.current_index += 1
continue
# 显示图片和操作说明
cv2.imshow("Captcha Labeler", image)
print(f"当前图片: {img_name}")
print("请输入验证码内容(输入'skip'跳过,'quit'退出):")
user_input = input().strip()
if user_input.lower() == 'quit':
break
elif user_input.lower() == 'skip':
self.current_index += 1
continue
else:
self.labels[img_name] = user_input
self.current_index += 1
cv2.destroyAllWindows()
self.save_labels()
print("标注完成!")
if name == "main":
labeler = CaptchaLabeler("real_captchas")
labeler.label_images()
四、高级模型架构
创建advanced_model.py:
python
import torch
import torch.nn as nn
import torch.nn.functional as F
from efficientnet_pytorch import EfficientNet
class HybridCaptchaModel(nn.Module):
def init(self, num_chars, pretrained=True):
super().init()
# 使用EfficientNet作为特征提取器
self.backbone = EfficientNet.from_pretrained('efficientnet-b3') if pretrained else EfficientNet.from_name('efficientnet-b3')
# 替换最后的全连接层
in_features = self.backbone._fc.in_features
self.backbone._fc = nn.Identity()
# 注意力机制
self.attention = nn.Sequential(
nn.Linear(in_features, in_features//2),
nn.ReLU(),
nn.Linear(in_features//2, 1),
nn.Sigmoid()
)
# 双向GRU
self.gru = nn.GRU(
input_size=in_features,
hidden_size=512,
num_layers=3,
bidirectional=True,
dropout=0.3
)
# 输出层
self.fc = nn.Linear(1024, num_chars)
# 初始化权重
self._init_weights()
def _init_weights(self):
for name, param in self.gru.named_parameters():
if 'weight' in name:
nn.init.orthogonal_(param)
nn.init.kaiming_normal_(self.fc.weight)
def forward(self, x):
# 提取特征
features = self.backbone(x) # [B, C]
features = features.unsqueeze(1) # [B, 1, C]
# 应用注意力
attn_weights = self.attention(features.squeeze(1)) # [B, 1]
features = features * attn_weights.unsqueeze(-1) # [B, 1, C]
# RNN处理
features = features.transpose(0, 1) # [1, B, C]
rnn_out, _ = self.gru(features) # [1, B, 2*H]
# 输出预测
output = self.fc(rnn_out) # [1, B, num_chars]
output = output.permute(1, 0, 2) # [B, 1, num_chars]
return output
五、模型训练与优化
创建train_advanced.py:
python
import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from advanced_model import HybridCaptchaModel
from data_loader import get_data_loaders
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
训练配置
class Config:
batch_size = 32
epochs = 50
lr = 3e-4
min_lr = 1e-6
weight_decay = 1e-4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_save_path = 'best_model.pth'
early_stop_patience = 5
def train_model():
# 初始化
config = Config()
train_loader, val_loader = get_data_loaders(config.batch_size)
model = HybridCaptchaModel(len(CHAR_SET)).to(config.device)
# 优化器和学习率调度
optimizer = AdamW(
model.parameters(),
lr=config.lr,
weight_decay=config.weight_decay
)
scheduler = CosineAnnealingLR(
optimizer,
T_max=config.epochs,
eta_min=config.min_lr
)
# 损失函数
criterion = nn.CTCLoss(blank=len(CHAR_SET))
# 训练循环
best_val_loss = float('inf')
patience_counter = 0
for epoch in range(config.epochs):
# 训练阶段
model.train()
train_loss = 0
progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{config.epochs} [Train]')
for images, targets, _ in progress_bar:
images = images.to(config.device)
targets = targets.to(config.device)
optimizer.zero_grad()
outputs = model(images)
# 准备CTC输入
input_lengths = torch.tensor([outputs.size(0)] * outputs.size(1), device=config.device)
target_lengths = torch.tensor([targets.size(1)] * targets.size(0), device=config.device)
loss = criterion(
outputs.log_softmax(2),
targets,
input_lengths,
target_lengths
)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
train_loss += loss.item()
progress_bar.set_postfix({'loss': loss.item()})
# 验证阶段
model.eval()
val_loss = 0
correct = 0
total = 0
with torch.no_grad():
for images, targets, target_strs in val_loader:
images = images.to(config.device)
targets = targets.to(config.device)
outputs = model(images)
# 计算验证损失
input_lengths = torch.tensor([outputs.size(0)] * outputs.size(1), device=config.device)
target_lengths = torch.tensor([targets.size(1)] * targets.size(0), device=config.device)
loss = criterion(
outputs.log_softmax(2),
targets,
input_lengths,
target_lengths
)
val_loss += loss.item()
# 计算准确率
_, predicted = torch.max(outputs, 2)
predicted = predicted.transpose(0, 1)
for i in range(len(target_strs)):
pred_str = ''.join([IDX_TO_CHAR[p] for p in predicted[i] if p < len(CHAR_SET)])
if pred_str == target_strs[i]:
correct += 1
total += 1
# 更新学习率
scheduler.step()
# 打印统计信息
train_loss /= len(train_loader)
val_loss /= len(val_loader)
val_acc = correct / total
print(f"\nEpoch {epoch+1} Summary:")
print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
# 早停和模型保存
if val_loss < best_val_loss:
best_val_loss = val_loss
torch.save(model.state_dict(), config.model_save_path)
patience_counter = 0
print(f"模型已保存,验证损失: {val_loss:.4f}")
else:
patience_counter += 1
if patience_counter >= config.early_stop_patience:
print("早停触发,训练终止")
break
if name == "main":
train_model()
六、模型部署与API服务
创建deployment.py:
python
from flask import Flask, request, jsonify
import torch
from advanced_model import HybridCaptchaModel
from PIL import Image
import io
import numpy as np
from torchvision import transforms
import time
app = Flask(name)
加载模型
model = HybridCaptchaModel(len(CHAR_SET))
model.load_state_dict(torch.load('best_model.pth', map_location='cpu'))
model.eval()
图像预处理
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
@app.route('/predict', methods=['POST'])
def predict():
start_time = time.time()
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
file = request.files['file']
if file.filename == '':
return jsonify({'error': 'Empty filename'}), 400
try:
# 读取并预处理图像
image_bytes = file.read()
image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
image = transform(image).unsqueeze(0)
# 预测
with torch.no_grad():
output = model(image)
_, predicted = torch.max(output, 2)
predicted = predicted.squeeze(0)
captcha_text = ''.join([IDX_TO_CHAR[p] for p in predicted if p < len(CHAR_SET)])
processing_time = time.time() - start_time
return jsonify({
'prediction': captcha_text,
'confidence': 0.95, # 可替换为实际置信度计算
'processing_time': f"{processing_time:.3f}s"
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/batch_predict', methods=['POST'])
def batch_predict():
start_time = time.time()
if 'files' not in request.files:
return jsonify({'error': 'No files provided'}), 400
files = request.files.getlist('files')
if not files:
return jsonify({'error': 'Empty file list'}), 400
try:
results = []
for file in files:
# 读取并预处理图像
image_bytes = file.read()
image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
image = transform(image).unsqueeze(0)
# 预测
with torch.no_grad():
output = model(image)
_, predicted = torch.max(output, 2)
predicted = predicted.squeeze(0)
captcha_text = ''.join([IDX_TO_CHAR[p] for p in predicted if p < len(CHAR_SET)])
results.append({
'filename': file.filename,
'prediction': captcha_text
})
processing_time = time.time() - start_time
return jsonify({
'results': results,
'total_time': f"{processing_time:.3f}s",
'avg_time': f"{processing_time/len(files):.3f}s"
})
except Exception as e:
return jsonify({'error': str(e)}), 500
if name == 'main':
app.run(host='0.0.0.0', port=5000, threaded=True)
七、性能优化技巧
7.1 模型量化
python
量化模型以提升推理速度
def quantize_model():
model = HybridCaptchaModel(len(CHAR_SET))
model.load_state_dict(torch.load('best_model.pth'))
model.eval()
# 动态量化
quantized_model = torch.quantization.quantize_dynamic(
model,
{torch.nn.Linear, torch.nn.GRU},
dtype=torch.qint8
)
# 保存量化模型
torch.save(quantized_model.state_dict(), 'quantized_model.pth')
print("模型量化完成!")
7.2 ONNX导出
python
def export_to_onnx():
model = HybridCaptchaModel(len(CHAR_SET))
model.load_state_dict(torch.load('best_model.pth'))
model.eval()
dummy_input = torch.randn(1, 3, 224, 224)
torch.onnx.export(
model,
dummy_input,
"captcha_model.onnx",
input_names=["input"],
output_names=["output"],
dynamic_axes={
"input": {0: "batch_size"},
"output": {0: "batch_size"}
},
opset_version=13
)
print("ONNX模型导出成功!")

浙公网安备 33010602011771号