验证码识别系统实战:从数据生成到生产部署全流程指南

  1. 项目介绍
    本教程将完整介绍如何使用PyTorch构建一个工业级的验证码识别系统,涵盖从数据准备到生产部署的全过程。我们将实现一个高精度的验证码识别模型,并展示如何将其部署为可扩展的Web服务。

  2. 环境配置
    2.1 安装依赖库
    bash
    pip install torch1.12.1 torchvision0.13.1
    pip install pillow9.3.0 numpy1.23.5 pandas1.5.2
    pip install matplotlib
    3.6.2 tqdm4.64.1
    pip install flask
    2.2.2 gunicorn20.1.0
    pip install albumentations
    1.3.0 efficientnet_pytorch0.7.1
    pip install pytorch-lightning
    1.7.7
    2.2 验证码数据集生成
    创建dataset_generator.py:
    网站地址www.tmocr.com或联系q1092685548
    python
    import os
    import random
    import string
    from PIL import Image, ImageDraw, ImageFont, ImageFilter
    import numpy as np

class CaptchaGenerator:
def init(self, width=200, height=80, length=6):
self.width = width
self.height = height
self.length = length
self.char_set = string.digits + string.ascii_uppercase
self.fonts = [
ImageFont.truetype("arial.ttf", size)
for size in range(32, 48, 4)
]

def generate(self):
    # 创建空白图像
    image = Image.new('RGB', (self.width, self.height), (255, 255, 255))
    draw = ImageDraw.Draw(image)
    text = ''.join(random.choices(self.char_set, k=self.length))
    
    # 绘制字符(带随机效果)
    x = 10
    for char in text:
        font = random.choice(self.fonts)
        y = random.randint(5, 20)
        angle = random.randint(-15, 15)
        
        # 绘制字符
        char_img = Image.new('RGBA', (40, 50), (255, 255, 255, 0))
        char_draw = ImageDraw.Draw(char_img)
        char_draw.text((5, 5), char, font=font, fill=self._random_color())
        char_img = char_img.rotate(angle, expand=1)
        
        image.paste(char_img, (x, y), char_img)
        x += 30 + random.randint(-5, 5)
    
    # 添加干扰元素
    image = self._add_noise(image)
    return image, text

def _random_color(self):
    return (
        random.randint(0, 150),
        random.randint(0, 150), 
        random.randint(0, 150)
    )

def _add_noise(self, image):
    draw = ImageDraw.Draw(image)
    
    # 添加随机噪点
    for _ in range(100):
        x = random.randint(0, self.width)
        y = random.randint(0, self.height)
        draw.point((x, y), fill=self._random_color())
    
    # 添加干扰线
    for _ in range(5):
        x1 = random.randint(0, self.width//2)
        y1 = random.randint(0, self.height)
        x2 = random.randint(self.width//2, self.width)
        y2 = random.randint(0, self.height)
        draw.line((x1, y1, x2, y2), fill=self._random_color(), width=1)
    
    # 轻度模糊
    return image.filter(ImageFilter.SMOOTH)

def generate_dataset(num_samples=50000, output_dir="dataset"):
os.makedirs(os.path.join(output_dir, "train"), exist_ok=True)
os.makedirs(os.path.join(output_dir, "val"), exist_ok=True)

generator = CaptchaGenerator()
for i in range(num_samples):
    image, text = generator.generate()
    
    # 按8:2划分训练集和验证集
    if random.random() < 0.8:
        image.save(f"{output_dir}/train/{text}_{i}.png")
    else:
        image.save(f"{output_dir}/val/{text}_{i}.png")
    
    if (i+1) % 1000 == 0:
        print(f"已生成 {i+1}/{num_samples} 张验证码")

if name == "main":
generate_dataset(100000)
3. 数据加载与预处理
创建data_module.py:

python
import torch
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import os
import numpy as np
from PIL import Image

class CaptchaDataset(Dataset):
def init(self, data_dir, transform=None):
self.data_dir = data_dir
self.image_files = [f for f in os.listdir(data_dir) if f.endswith('.png')]
self.transform = transform
self.char_to_idx = {c: i for i, c in enumerate(CHAR_SET)}
self.idx_to_char = {i: c for i, c in enumerate(CHAR_SET)}

def __len__(self):
    return len(self.image_files)

def __getitem__(self, idx):
    img_path = os.path.join(self.data_dir, self.image_files[idx])
    image = np.array(Image.open(img_path).convert('RGB'))
    label_str = self.image_files[idx].split('_')[0]
    label = [self.char_to_idx[c] for c in label_str]
    
    if self.transform:
        augmented = self.transform(image=image)
        image = augmented['image']
    
    return {
        'image': image,
        'label': torch.tensor(label),
        'label_str': label_str
    }

class CaptchaDataModule:
def init(self, batch_size=64):
self.batch_size = batch_size

    # 训练集数据增强
    self.train_transform = A.Compose([
        A.Rotate(limit=10, p=0.5),
        A.Perspective(scale=(0.05, 0.1), 
        A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        A.GaussianBlur(blur_limit=(3, 7)),
        A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
        ToTensorV2()
    ])
    
    # 验证集转换
    self.val_transform = A.Compose([
        A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
        ToTensorV2()
    ])

def setup(self):
    # 创建数据集
    self.train_dataset = CaptchaDataset(
        os.path.join("dataset", "train"),
        self.train_transform
    )
    
    self.val_dataset = CaptchaDataset(
        os.path.join("dataset", "val"), 
        self.val_transform
    )

def train_dataloader(self):
    return DataLoader(
        self.train_dataset,
        batch_size=self.batch_size,
        shuffle=True,
        num_workers=4,
        pin_memory=True
    )

def val_dataloader(self):
    return DataLoader(
        self.val_dataset,
        batch_size=self.batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=True
    )
  1. 模型设计与实现
    创建captcha_model.py:

python
import torch
import torch.nn as nn
import torch.nn.functional as F
from efficientnet_pytorch import EfficientNet
import pytorch_lightning as pl

class CaptchaModel(pl.LightningModule):
def init(self, num_chars):
super().init()
self.save_hyperparameters()

    # 使用EfficientNet作为特征提取器
    self.backbone = EfficientNet.from_pretrained('efficientnet-b3')
    self.backbone._fc = nn.Identity()
    
    # 注意力机制
    self.attention = nn.Sequential(
        nn.AdaptiveAvgPool2d(1),
        nn.Flatten(),
        nn.Linear(1536, 1536 // 8),
        nn.ReLU(),
        nn.Linear(1536 // 8, 1536),
        nn.Sigmoid()
    )
    
    # 双向GRU
    self.gru = nn.GRU(
        input_size=1536,
        hidden_size=512,
        num_layers=3,
        bidirectional=True,
        dropout=0.3
    )
    
    # 输出层
    self.fc = nn.Linear(1024, num_chars)
    
    # 损失函数
    self.criterion = nn.CTCLoss(blank=num_chars)

def forward(self, x):
    # 提取特征
    features = self.backbone.extract_features(x)
    
    # 应用注意力
    attn_weights = self.attention(features).view(-1, 1536, 1, 1)
    features = features * attn_weights
    
    # 调整维度适应RNN
    B, C, H, W = features.size()
    features = features.permute(0, 3, 1, 2)  # [B, W, C, H]
    features = features.reshape(B, -1, C * H)  # [B, W, C*H]
    
    # RNN处理
    features = features.permute(1, 0, 2)  # [W, B, C*H]
    rnn_out, _ = self.gru(features)
    
    # 输出预测
    output = self.fc(rnn_out)  # [W, B, num_chars]
    return output

def training_step(self, batch, batch_idx):
    images = batch['image']
    labels = batch['label']
    
    outputs = self(images)
    
    # 准备CTC输入
    input_lengths = torch.full(
        size=(outputs.size(1),),
        fill_value=outputs.size(0),
        device=self.device
    )
    target_lengths = torch.full(
        size=(labels.size(0),),
        fill_value=labels.size(1),
        device=self.device
    )
    
    loss = self.criterion(
        outputs.log_softmax(2),
        labels,
        input_lengths,
        target_lengths
    )
    
    # 计算准确率
    acc = self._compute_accuracy(outputs, labels)
    
    self.log('train_loss', loss, prog_bar=True)
    self.log('train_acc', acc, prog_bar=True)
    return loss

def validation_step(self, batch, batch_idx):
    images = batch['image']
    labels = batch['label']
    
    outputs = self(images)
    
    # 计算验证损失
    input_lengths = torch.full(
        size=(outputs.size(1),),
        fill_value=outputs.size(0),
        device=self.device
    )
    target_lengths = torch.full(
        size=(labels.size(0),),
        fill_value=labels.size(1),
        device=self.device
    )
    
    loss = self.criterion(
        outputs.log_softmax(2),
        labels,
        input_lengths,
        target_lengths
    )
    
    # 计算准确率
    acc = self._compute_accuracy(outputs, labels)
    
    self.log('val_loss', loss, prog_bar=True)
    self.log('val_acc', acc, prog_bar=True)
    return loss

def _compute_accuracy(self, outputs, labels):
    _, preds = torch.max(outputs, 2)
    preds = preds.permute(1, 0)
    
    correct = 0
    for i in range(labels.size(0)):
        pred_str = ''.join([self.hparams.idx_to_char[p.item()] 
                          for p in preds[i] if p.item() < len(self.hparams.char_set)])
        true_str = ''.join([self.hparams.idx_to_char[t.item()] 
                          for t in labels[i]])
        if pred_str == true_str:
            correct += 1
    
    return correct / labels.size(0)

def configure_optimizers(self):
    optimizer = torch.optim.AdamW(
        self.parameters(),
        lr=3e-4,
        weight_decay=1e-5
    )
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='max',
        factor=0.5,
        patience=3,
        verbose=True
    )
    
    return {
        'optimizer': optimizer,
        'lr_scheduler': {
            'scheduler': scheduler,
            'monitor': 'val_acc'
        }
    }
  1. 模型训练与评估
    创建train.py:

python
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from captcha_model import CaptchaModel
from data_module import CaptchaDataModule

def train():
# 数据模块
data_module = CaptchaDataModule(batch_size=64)
data_module.setup()

# 模型
model = CaptchaModel(num_chars=len(CHAR_SET))

# 训练回调
checkpoint_callback = ModelCheckpoint(
    monitor='val_acc',
    mode='max',
    save_top_k=3,
    filename='captcha-{epoch:02d}-{val_acc:.2f}'
)

early_stop_callback = EarlyStopping(
    monitor='val_acc',
    patience=5,
    mode='max'
)

# 训练器
trainer = pl.Trainer(
    max_epochs=30,
    callbacks=[checkpoint_callback, early_stop_callback],
    accelerator='gpu' if torch.cuda.is_available() else 'cpu',
    devices=1,
    log_every_n_steps=10,
    precision=16
)

# 开始训练
trainer.fit(model, datamodule=data_module)

if name == "main":
train()
6. 模型部署与API服务
创建app.py:

python
from flask import Flask, request, jsonify
import torch
from captcha_model import CaptchaModel
import numpy as np
from PIL import Image
import io
import onnxruntime as ort

app = Flask(name)

加载ONNX模型

ort_session = ort.InferenceSession("captcha_model.onnx")

def preprocess_image(image_bytes):
image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
image = np.array(image)

transform = A.Compose([
    A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ToTensorV2()
])
return transform(image=image)['image'].unsqueeze(0).numpy()

@app.route('/predict', methods=['POST'])
def predict():
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400

file = request.files['file']
if file.filename == '':
    return jsonify({'error': 'Empty filename'}), 400

try:
    # 预处理图像
    image_numpy = preprocess_image(file.read())
    
    # ONNX推理
    ort_inputs = {ort_session.get_inputs()[0].name: image_numpy}
    ort_outs = ort_session.run(None, ort_inputs)
    outputs = torch.tensor(ort_outs[0])
    
    # 解码预测结果
    _, predicted = torch.max(outputs, 2)
    predicted = predicted.squeeze(0)
    captcha_text = ''.join([IDX_TO_CHAR[p.item()] 
                          for p in predicted if p.item() < len(CHAR_SET)])
    
    return jsonify({'prediction': captcha_text})

except Exception as e:
    return jsonify({'error': str(e)}), 500

if name == 'main':
app.run(host='0.0.0.0', port=5000)
7. 性能优化与生产部署
7.1 模型导出为ONNX格式
python
def export_to_onnx():
# 加载训练好的模型
model = CaptchaModel.load_from_checkpoint("best_model.ckpt")
model.eval()

# 创建虚拟输入
dummy_input = torch.randn(1, 3, 80, 220)

# 导出模型
torch.onnx.export(
    model,
    dummy_input,
    "captcha_model.onnx",
    input_names=["input"],
    output_names=["output"],
    dynamic_axes={
        "input": {0: "batch_size"},
        "output": {0: "batch_size"}
    },
    opset_version=13
)

7.2 Docker容器化部署
创建Dockerfile:

dockerfile
FROM python:3.8-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

EXPOSE 5000

CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "app:app"]
构建并运行容器:

bash
docker build -t captcha-service .
docker run -p 5000:5000 captcha-service

posted @ 2025-05-10 21:37  tmcor  阅读(70)  评论(0)    收藏  举报