验证码识别系统实战:从数据生成到生产部署全流程指南
-
项目介绍
本教程将完整介绍如何使用PyTorch构建一个工业级的验证码识别系统,涵盖从数据准备到生产部署的全过程。我们将实现一个高精度的验证码识别模型,并展示如何将其部署为可扩展的Web服务。 -
环境配置
2.1 安装依赖库
bash
pip install torch1.12.1 torchvision0.13.1
pip install pillow9.3.0 numpy1.23.5 pandas1.5.2
pip install matplotlib3.6.2 tqdm4.64.1
pip install flask2.2.2 gunicorn20.1.0
pip install albumentations1.3.0 efficientnet_pytorch0.7.1
pip install pytorch-lightning1.7.7
2.2 验证码数据集生成
创建dataset_generator.py:
网站地址www.tmocr.com或联系q1092685548
python
import os
import random
import string
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import numpy as np
class CaptchaGenerator:
def init(self, width=200, height=80, length=6):
self.width = width
self.height = height
self.length = length
self.char_set = string.digits + string.ascii_uppercase
self.fonts = [
ImageFont.truetype("arial.ttf", size)
for size in range(32, 48, 4)
]
def generate(self):
# 创建空白图像
image = Image.new('RGB', (self.width, self.height), (255, 255, 255))
draw = ImageDraw.Draw(image)
text = ''.join(random.choices(self.char_set, k=self.length))
# 绘制字符(带随机效果)
x = 10
for char in text:
font = random.choice(self.fonts)
y = random.randint(5, 20)
angle = random.randint(-15, 15)
# 绘制字符
char_img = Image.new('RGBA', (40, 50), (255, 255, 255, 0))
char_draw = ImageDraw.Draw(char_img)
char_draw.text((5, 5), char, font=font, fill=self._random_color())
char_img = char_img.rotate(angle, expand=1)
image.paste(char_img, (x, y), char_img)
x += 30 + random.randint(-5, 5)
# 添加干扰元素
image = self._add_noise(image)
return image, text
def _random_color(self):
return (
random.randint(0, 150),
random.randint(0, 150),
random.randint(0, 150)
)
def _add_noise(self, image):
draw = ImageDraw.Draw(image)
# 添加随机噪点
for _ in range(100):
x = random.randint(0, self.width)
y = random.randint(0, self.height)
draw.point((x, y), fill=self._random_color())
# 添加干扰线
for _ in range(5):
x1 = random.randint(0, self.width//2)
y1 = random.randint(0, self.height)
x2 = random.randint(self.width//2, self.width)
y2 = random.randint(0, self.height)
draw.line((x1, y1, x2, y2), fill=self._random_color(), width=1)
# 轻度模糊
return image.filter(ImageFilter.SMOOTH)
def generate_dataset(num_samples=50000, output_dir="dataset"):
os.makedirs(os.path.join(output_dir, "train"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "val"), exist_ok=True)
generator = CaptchaGenerator()
for i in range(num_samples):
image, text = generator.generate()
# 按8:2划分训练集和验证集
if random.random() < 0.8:
image.save(f"{output_dir}/train/{text}_{i}.png")
else:
image.save(f"{output_dir}/val/{text}_{i}.png")
if (i+1) % 1000 == 0:
print(f"已生成 {i+1}/{num_samples} 张验证码")
if name == "main":
generate_dataset(100000)
3. 数据加载与预处理
创建data_module.py:
python
import torch
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import os
import numpy as np
from PIL import Image
class CaptchaDataset(Dataset):
def init(self, data_dir, transform=None):
self.data_dir = data_dir
self.image_files = [f for f in os.listdir(data_dir) if f.endswith('.png')]
self.transform = transform
self.char_to_idx = {c: i for i, c in enumerate(CHAR_SET)}
self.idx_to_char = {i: c for i, c in enumerate(CHAR_SET)}
def __len__(self):
return len(self.image_files)
def __getitem__(self, idx):
img_path = os.path.join(self.data_dir, self.image_files[idx])
image = np.array(Image.open(img_path).convert('RGB'))
label_str = self.image_files[idx].split('_')[0]
label = [self.char_to_idx[c] for c in label_str]
if self.transform:
augmented = self.transform(image=image)
image = augmented['image']
return {
'image': image,
'label': torch.tensor(label),
'label_str': label_str
}
class CaptchaDataModule:
def init(self, batch_size=64):
self.batch_size = batch_size
# 训练集数据增强
self.train_transform = A.Compose([
A.Rotate(limit=10, p=0.5),
A.Perspective(scale=(0.05, 0.1),
A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
A.GaussianBlur(blur_limit=(3, 7)),
A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
ToTensorV2()
])
# 验证集转换
self.val_transform = A.Compose([
A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
ToTensorV2()
])
def setup(self):
# 创建数据集
self.train_dataset = CaptchaDataset(
os.path.join("dataset", "train"),
self.train_transform
)
self.val_dataset = CaptchaDataset(
os.path.join("dataset", "val"),
self.val_transform
)
def train_dataloader(self):
return DataLoader(
self.train_dataset,
batch_size=self.batch_size,
shuffle=True,
num_workers=4,
pin_memory=True
)
def val_dataloader(self):
return DataLoader(
self.val_dataset,
batch_size=self.batch_size,
shuffle=False,
num_workers=4,
pin_memory=True
)
- 模型设计与实现
创建captcha_model.py:
python
import torch
import torch.nn as nn
import torch.nn.functional as F
from efficientnet_pytorch import EfficientNet
import pytorch_lightning as pl
class CaptchaModel(pl.LightningModule):
def init(self, num_chars):
super().init()
self.save_hyperparameters()
# 使用EfficientNet作为特征提取器
self.backbone = EfficientNet.from_pretrained('efficientnet-b3')
self.backbone._fc = nn.Identity()
# 注意力机制
self.attention = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Flatten(),
nn.Linear(1536, 1536 // 8),
nn.ReLU(),
nn.Linear(1536 // 8, 1536),
nn.Sigmoid()
)
# 双向GRU
self.gru = nn.GRU(
input_size=1536,
hidden_size=512,
num_layers=3,
bidirectional=True,
dropout=0.3
)
# 输出层
self.fc = nn.Linear(1024, num_chars)
# 损失函数
self.criterion = nn.CTCLoss(blank=num_chars)
def forward(self, x):
# 提取特征
features = self.backbone.extract_features(x)
# 应用注意力
attn_weights = self.attention(features).view(-1, 1536, 1, 1)
features = features * attn_weights
# 调整维度适应RNN
B, C, H, W = features.size()
features = features.permute(0, 3, 1, 2) # [B, W, C, H]
features = features.reshape(B, -1, C * H) # [B, W, C*H]
# RNN处理
features = features.permute(1, 0, 2) # [W, B, C*H]
rnn_out, _ = self.gru(features)
# 输出预测
output = self.fc(rnn_out) # [W, B, num_chars]
return output
def training_step(self, batch, batch_idx):
images = batch['image']
labels = batch['label']
outputs = self(images)
# 准备CTC输入
input_lengths = torch.full(
size=(outputs.size(1),),
fill_value=outputs.size(0),
device=self.device
)
target_lengths = torch.full(
size=(labels.size(0),),
fill_value=labels.size(1),
device=self.device
)
loss = self.criterion(
outputs.log_softmax(2),
labels,
input_lengths,
target_lengths
)
# 计算准确率
acc = self._compute_accuracy(outputs, labels)
self.log('train_loss', loss, prog_bar=True)
self.log('train_acc', acc, prog_bar=True)
return loss
def validation_step(self, batch, batch_idx):
images = batch['image']
labels = batch['label']
outputs = self(images)
# 计算验证损失
input_lengths = torch.full(
size=(outputs.size(1),),
fill_value=outputs.size(0),
device=self.device
)
target_lengths = torch.full(
size=(labels.size(0),),
fill_value=labels.size(1),
device=self.device
)
loss = self.criterion(
outputs.log_softmax(2),
labels,
input_lengths,
target_lengths
)
# 计算准确率
acc = self._compute_accuracy(outputs, labels)
self.log('val_loss', loss, prog_bar=True)
self.log('val_acc', acc, prog_bar=True)
return loss
def _compute_accuracy(self, outputs, labels):
_, preds = torch.max(outputs, 2)
preds = preds.permute(1, 0)
correct = 0
for i in range(labels.size(0)):
pred_str = ''.join([self.hparams.idx_to_char[p.item()]
for p in preds[i] if p.item() < len(self.hparams.char_set)])
true_str = ''.join([self.hparams.idx_to_char[t.item()]
for t in labels[i]])
if pred_str == true_str:
correct += 1
return correct / labels.size(0)
def configure_optimizers(self):
optimizer = torch.optim.AdamW(
self.parameters(),
lr=3e-4,
weight_decay=1e-5
)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer,
mode='max',
factor=0.5,
patience=3,
verbose=True
)
return {
'optimizer': optimizer,
'lr_scheduler': {
'scheduler': scheduler,
'monitor': 'val_acc'
}
}
- 模型训练与评估
创建train.py:
python
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from captcha_model import CaptchaModel
from data_module import CaptchaDataModule
def train():
# 数据模块
data_module = CaptchaDataModule(batch_size=64)
data_module.setup()
# 模型
model = CaptchaModel(num_chars=len(CHAR_SET))
# 训练回调
checkpoint_callback = ModelCheckpoint(
monitor='val_acc',
mode='max',
save_top_k=3,
filename='captcha-{epoch:02d}-{val_acc:.2f}'
)
early_stop_callback = EarlyStopping(
monitor='val_acc',
patience=5,
mode='max'
)
# 训练器
trainer = pl.Trainer(
max_epochs=30,
callbacks=[checkpoint_callback, early_stop_callback],
accelerator='gpu' if torch.cuda.is_available() else 'cpu',
devices=1,
log_every_n_steps=10,
precision=16
)
# 开始训练
trainer.fit(model, datamodule=data_module)
if name == "main":
train()
6. 模型部署与API服务
创建app.py:
python
from flask import Flask, request, jsonify
import torch
from captcha_model import CaptchaModel
import numpy as np
from PIL import Image
import io
import onnxruntime as ort
app = Flask(name)
加载ONNX模型
ort_session = ort.InferenceSession("captcha_model.onnx")
def preprocess_image(image_bytes):
image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
image = np.array(image)
transform = A.Compose([
A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
ToTensorV2()
])
return transform(image=image)['image'].unsqueeze(0).numpy()
@app.route('/predict', methods=['POST'])
def predict():
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
file = request.files['file']
if file.filename == '':
return jsonify({'error': 'Empty filename'}), 400
try:
# 预处理图像
image_numpy = preprocess_image(file.read())
# ONNX推理
ort_inputs = {ort_session.get_inputs()[0].name: image_numpy}
ort_outs = ort_session.run(None, ort_inputs)
outputs = torch.tensor(ort_outs[0])
# 解码预测结果
_, predicted = torch.max(outputs, 2)
predicted = predicted.squeeze(0)
captcha_text = ''.join([IDX_TO_CHAR[p.item()]
for p in predicted if p.item() < len(CHAR_SET)])
return jsonify({'prediction': captcha_text})
except Exception as e:
return jsonify({'error': str(e)}), 500
if name == 'main':
app.run(host='0.0.0.0', port=5000)
7. 性能优化与生产部署
7.1 模型导出为ONNX格式
python
def export_to_onnx():
# 加载训练好的模型
model = CaptchaModel.load_from_checkpoint("best_model.ckpt")
model.eval()
# 创建虚拟输入
dummy_input = torch.randn(1, 3, 80, 220)
# 导出模型
torch.onnx.export(
model,
dummy_input,
"captcha_model.onnx",
input_names=["input"],
output_names=["output"],
dynamic_axes={
"input": {0: "batch_size"},
"output": {0: "batch_size"}
},
opset_version=13
)
7.2 Docker容器化部署
创建Dockerfile:
dockerfile
FROM python:3.8-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 5000
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "app:app"]
构建并运行容器:
bash
docker build -t captcha-service .
docker run -p 5000:5000 captcha-service

浙公网安备 33010602011771号