验证码识别系统实战:从数据生成到生产部署全流程指南

  1. 项目介绍
    本教程将完整介绍如何使用PyTorch构建一个工业级的验证码识别系统,涵盖从数据准备到生产部署的全过程。我们将实现一个高精度的验证码识别模型,并展示如何将其部署为可扩展的Web服务。

  2. 环境配置
    2.1 安装依赖库
    bash
    pip install torch1.12.1 torchvision0.13.1
    pip install pillow9.3.0 numpy1.23.5 pandas1.5.2
    pip install matplotlib
    3.6.2 tqdm4.64.1
    pip install flask
    2.2.2 gunicorn20.1.0
    pip install albumentations
    1.3.0 efficientnet_pytorch0.7.1
    pip install pytorch-lightning
    1.7.7
    2.2 验证码数据集生成
    创建dataset_generator.py:
    网站地址www.tmocr.com或联系q1092685548
    python
    import os
    import random
    import string
    from PIL import Image, ImageDraw, ImageFont, ImageFilter
    import numpy as np

class CaptchaGenerator:
def init(self, width=200, height=80, length=6):
self.width = width
self.height = height
self.length = length
self.char_set = string.digits + string.ascii_uppercase
self.fonts = [
ImageFont.truetype("arial.ttf", size)
for size in range(32, 48, 4)
]

def generate(self):
# 创建空白图像
image = Image.new('RGB', (self.width, self.height), (255, 255, 255))
draw = ImageDraw.Draw(image)
text = ''.join(random.choices(self.char_set, k=self.length))

绘制字符(带随机效果)

x = 10
for char in text:
font = random.choice(self.fonts)
y = random.randint(5, 20)
angle = random.randint(-15, 15)

绘制字符

char_img = Image.new('RGBA', (40, 50), (255, 255, 255, 0))
char_draw = ImageDraw.Draw(char_img)
char_draw.text((5, 5), char, font=font, fill=self._random_color())
char_img = char_img.rotate(angle, expand=1)

image.paste(char_img, (x, y), char_img)
x += 30 + random.randint(-5, 5)

添加干扰元素

image = self._add_noise(image)
return image, text

def _random_color(self):
return (
random.randint(0, 150),
random.randint(0, 150),
random.randint(0, 150)
)

def _add_noise(self, image):
draw = ImageDraw.Draw(image)

添加随机噪点

for _ in range(100):
x = random.randint(0, self.width)
y = random.randint(0, self.height)
draw.point((x, y), fill=self._random_color())

添加干扰线

for _ in range(5):
x1 = random.randint(0, self.width//2)
y1 = random.randint(0, self.height)
x2 = random.randint(self.width//2, self.width)
y2 = random.randint(0, self.height)
draw.line((x1, y1, x2, y2), fill=self._random_color(), width=1)

轻度模糊

return image.filter(ImageFilter.SMOOTH)

def generate_dataset(num_samples=50000, output_dir="dataset"):
os.makedirs(os.path.join(output_dir, "train"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "val"), exist_ok=True)

generator = CaptchaGenerator()
for i in range(num_samples):
image, text = generator.generate()

按8:2划分训练集和验证集

if random.random() < 0.8:
image.save(f"{output_dir}/train/{text}{i}.png")
else:
image.save(f"{output_dir}/val/{text}
.png")

if (i+1) % 1000 == 0:
print(f"已生成 {i+1}/{num_samples} 张验证码")

if name == "main":
generate_dataset(100000)
3. 数据加载与预处理
创建data_module.py:

python
import torch
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import os
import numpy as np
from PIL import Image

class CaptchaDataset(Dataset):
def init(self, data_dir, transform=None):
self.data_dir = data_dir
self.image_files = [f for f in os.listdir(data_dir) if f.endswith('.png')]
self.transform = transform
self.char_to_idx = {c: i for i, c in enumerate(CHAR_SET)}
self.idx_to_char = {i: c for i, c in enumerate(CHAR_SET)}

def len(self):
return len(self.image_files)

def getitem(self, idx):
img_path = os.path.join(self.data_dir, self.image_files[idx])
image = np.array(Image.open(img_path).convert('RGB'))
label_str = self.image_files[idx].split('_')[0]
label = [self.char_to_idx[c] for c in label_str]

if self.transform:
augmented = self.transform(image=image)
image = augmented['image']

return {
'image': image,
'label': torch.tensor(label),
'label_str': label_str
}

class CaptchaDataModule:
def init(self, batch_size=64):
self.batch_size = batch_size

训练集数据增强

self.train_transform = A.Compose([
A.Rotate(limit=10, p=0.5),
A.Perspective(scale=(0.05, 0.1),
A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
A.GaussianBlur(blur_limit=(3, 7)),
A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
ToTensorV2()
])

验证集转换

self.val_transform = A.Compose([
A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
ToTensorV2()
])

def setup(self):
# 创建数据集
self.train_dataset = CaptchaDataset(
os.path.join("dataset", "train"),
self.train_transform
)

self.val_dataset = CaptchaDataset(
os.path.join("dataset", "val"),
self.val_transform
)

def train_dataloader(self):
return DataLoader(
self.train_dataset,
batch_size=self.batch_size,
shuffle=True,
num_workers=4,
pin_memory=True
)

def val_dataloader(self):
return DataLoader(
self.val_dataset,
batch_size=self.batch_size,
shuffle=False,
num_workers=4,
pin_memory=True
)
4. 模型设计与实现
创建captcha_model.py:

python
import torch
import torch.nn as nn
import torch.nn.functional as F
from efficientnet_pytorch import EfficientNet
import pytorch_lightning as pl

class CaptchaModel(pl.LightningModule):
def init(self, num_chars):
super().init()
self.save_hyperparameters()

使用EfficientNet作为特征提取器

self.backbone = EfficientNet.from_pretrained('efficientnet-b3')
self.backbone._fc = nn.Identity()

注意力机制

self.attention = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Flatten(),
nn.Linear(1536, 1536 // 8),
nn.ReLU(),
nn.Linear(1536 // 8, 1536),
nn.Sigmoid()
)

双向GRU

self.gru = nn.GRU(
input_size=1536,
hidden_size=512,
num_layers=3,
bidirectional=True,
dropout=0.3
)

输出层

self.fc = nn.Linear(1024, num_chars)

损失函数

self.criterion = nn.CTCLoss(blank=num_chars)

def forward(self, x):
# 提取特征
features = self.backbone.extract_features(x)

应用注意力

attn_weights = self.attention(features).view(-1, 1536, 1, 1)
features = features * attn_weights

调整维度适应RNN

B, C, H, W = features.size()
features = features.permute(0, 3, 1, 2) # [B, W, C, H]
features = features.reshape(B, -1, C * H) # [B, W, C*H]

RNN处理

features = features.permute(1, 0, 2) # [W, B, C*H]
rnn_out, _ = self.gru(features)

输出预测

output = self.fc(rnn_out) # [W, B, num_chars]
return output

def training_step(self, batch, batch_idx):
images = batch['image']
labels = batch['label']

outputs = self(images)

准备CTC输入

input_lengths = torch.full(
size=(outputs.size(1),),
fill_value=outputs.size(0),
device=self.device
)
target_lengths = torch.full(
size=(labels.size(0),),
fill_value=labels.size(1),
device=self.device
)

loss = self.criterion(
outputs.log_softmax(2),
labels,
input_lengths,
target_lengths
)

计算准确率

acc = self._compute_accuracy(outputs, labels)

self.log('train_loss', loss, prog_bar=True)
self.log('train_acc', acc, prog_bar=True)
return loss

def validation_step(self, batch, batch_idx):
images = batch['image']
labels = batch['label']

outputs = self(images)

计算验证损失

input_lengths = torch.full(
size=(outputs.size(1),),
fill_value=outputs.size(0),
device=self.device
)
target_lengths = torch.full(
size=(labels.size(0),),
fill_value=labels.size(1),
device=self.device
)

loss = self.criterion(
outputs.log_softmax(2),
labels,
input_lengths,
target_lengths
)

计算准确率

acc = self._compute_accuracy(outputs, labels)

self.log('val_loss', loss, prog_bar=True)
self.log('val_acc', acc, prog_bar=True)
return loss

def _compute_accuracy(self, outputs, labels):
_, preds = torch.max(outputs, 2)
preds = preds.permute(1, 0)

correct = 0
for i in range(labels.size(0)):
pred_str = ''.join([self.hparams.idx_to_char[p.item()]
for p in preds[i] if p.item() < len(self.hparams.char_set)])
true_str = ''.join([self.hparams.idx_to_char[t.item()]
for t in labels[i]])
if pred_str == true_str:
correct += 1

return correct / labels.size(0)

def configure_optimizers(self):
optimizer = torch.optim.AdamW(
self.parameters(),
lr=3e-4,
weight_decay=1e-5
)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer,
mode='max',
factor=0.5,
patience=3,
verbose=True
)

return {
'optimizer': optimizer,
'lr_scheduler': {
'scheduler': scheduler,
'monitor': 'val_acc'
}
}
5. 模型训练与评估
创建train.py:

python
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from captcha_model import CaptchaModel
from data_module import CaptchaDataModule

def train():
# 数据模块
data_module = CaptchaDataModule(batch_size=64)
data_module.setup()

模型

model = CaptchaModel(num_chars=len(CHAR_SET))

训练回调

checkpoint_callback = ModelCheckpoint(
monitor='val_acc',
mode='max',
save_top_k=3,
filename='captcha-{epoch:02d}-{val_acc:.2f}'
)

early_stop_callback = EarlyStopping(
monitor='val_acc',
patience=5,
mode='max'
)

训练器

trainer = pl.Trainer(
max_epochs=30,
callbacks=[checkpoint_callback, early_stop_callback],
accelerator='gpu' if torch.cuda.is_available() else 'cpu',
devices=1,
log_every_n_steps=10,
precision=16
)

开始训练

trainer.fit(model, datamodule=data_module)

if name == "main":
train()
6. 模型部署与API服务
创建app.py:

python
from flask import Flask, request, jsonify
import torch
from captcha_model import CaptchaModel
import numpy as np
from PIL import Image
import io
import onnxruntime as ort

app = Flask(name)

加载ONNX模型

ort_session = ort.InferenceSession("captcha_model.onnx")

def preprocess_image(image_bytes):
image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
image = np.array(image)

transform = A.Compose([
A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
ToTensorV2()
])
return transform(image=image)['image'].unsqueeze(0).numpy()

@app.route('/predict', methods=['POST'])
def predict():
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400

file = request.files['file']
if file.filename == '':
return jsonify({'error': 'Empty filename'}), 400

try:
# 预处理图像
image_numpy = preprocess_image(file.read())

ONNX推理

ort_inputs = {ort_session.get_inputs()[0].name: image_numpy}
ort_outs = ort_session.run(None, ort_inputs)
outputs = torch.tensor(ort_outs[0])

解码预测结果

_, predicted = torch.max(outputs, 2)
predicted = predicted.squeeze(0)
captcha_text = ''.join([IDX_TO_CHAR[p.item()]
for p in predicted if p.item() < len(CHAR_SET)])

return jsonify({'prediction': captcha_text})

except Exception as e:
return jsonify({'error': str(e)}), 500

if name == 'main':
app.run(host='0.0.0.0', port=5000)
7. 性能优化与生产部署
7.1 模型导出为ONNX格式
python
def export_to_onnx():
# 加载训练好的模型
model = CaptchaModel.load_from_checkpoint("best_model.ckpt")
model.eval()

创建虚拟输入

dummy_input = torch.randn(1, 3, 80, 220)

导出模型

torch.onnx.export(
model,
dummy_input,
"captcha_model.onnx",
input_names=["input"],
output_names=["output"],
dynamic_axes={
"input": {0: "batch_size"},
"output": {0: "batch_size"}
},
opset_version=13
)
7.2 Docker容器化部署
创建Dockerfile:

dockerfile
FROM python:3.8-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

EXPOSE 5000

CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "app:app"]
构建并运行容器:

bash
docker build -t captcha-service .
docker run -p 5000:5000 captcha-service

posted @ 2025-05-10 21:37  tmcor  阅读(134)  评论(0)    收藏  举报