用 PyTorch 打造抗干扰验证码识别器
一、项目背景
随着验证码在安全验证领域中的广泛应用,其识别系统在自动化登录、数据采集等场景扮演着重要角色。传统识别方法在面对带干扰线、扭曲和变形的验证码时识别率较低。为此,本文提出一种具有鲁棒性增强机制的 PyTorch 验证码识别模型,并在不同类型验证码上进行测试和优化。
二、系统总体设计
本系统分为四个部分:
验证码图像生成(带干扰)
图像增强与对抗扰动模拟
更多内容访问ttocr.com或联系1436423940
卷积神经网络模型设计
模型训练与评估
三、验证码图像生成与对抗扰动增强
我们使用 captcha 与 imgaug 组合生成具有强干扰的验证码图像,字符集中包含大写字母与数字,验证码长度为 4。
from captcha.image import ImageCaptcha
import random, string, os
from PIL import Image
import numpy as np
import imgaug.augmenters as iaa
CHARSET = string.ascii_uppercase + string.digits
def generate_captcha(text):
generator = ImageCaptcha(width=120, height=40)
return generator.generate_image(text)
def add_noise(image):
seq = iaa.Sequential([
iaa.AdditiveGaussianNoise(scale=(5, 20)),
iaa.Sharpen(alpha=0.3),
iaa.PiecewiseAffine(scale=(0.01, 0.03))
])
return seq(image=np.array(image))
os.makedirs("data/train", exist_ok=True)
for i in range(5000):
text = ''.join(random.choices(CHARSET, k=4))
img = generate_captcha(text)
img = Image.fromarray(add_noise(img)).convert("L")
img.save(f"data/train/{text}_{i}.png")
四、模型结构设计
模型采用残差结构以增强深层特征传递能力,输出为每个字符位置的分类概率。
import torch.nn as nn
class ResidualBlock(nn.Module):
def init(self, in_c, out_c):
super().init()
self.block = nn.Sequential(
nn.Conv2d(in_c, out_c, 3, padding=1),
nn.BatchNorm2d(out_c),
nn.ReLU(),
nn.Conv2d(out_c, out_c, 3, padding=1),
nn.BatchNorm2d(out_c)
)
self.skip = nn.Conv2d(in_c, out_c, 1)
def forward(self, x):
return nn.ReLU()(self.block(x) + self.skip(x))
class RobustCaptchaModel(nn.Module):
def init(self, num_chars=4, num_classes=36):
super().init()
self.conv = nn.Sequential(
nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
ResidualBlock(32, 64), nn.MaxPool2d(2),
ResidualBlock(64, 128), nn.MaxPool2d(2),
)
self.fc = nn.Sequential(
nn.Flatten(),
nn.Linear(128 * 5 * 15, 512), nn.ReLU()
)
self.heads = nn.ModuleList([nn.Linear(512, num_classes) for _ in range(num_chars)])
def forward(self, x):
x = self.conv(x)
x = self.fc(x)
return [head(x) for head in self.heads]
五、训练策略与优化
为了提高模型鲁棒性,我们使用以下技巧:
Label Smoothing:避免模型过拟合特定字符
MixUp 图像混合增强
Early Stopping 与 Learning Rate Scheduler
训练流程示意:
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from dataset import CaptchaDataset
model = RobustCaptchaModel().cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
loader = DataLoader(CaptchaDataset("data/train"), batch_size=64, shuffle=True)
for epoch in range(30):
model.train()
total_loss = 0
for img, label in loader:
img, label = img.cuda(), label.cuda()
preds = model(img)
loss = sum(F.cross_entropy(p, label[:, i], label_smoothing=0.1) for i, p in enumerate(preds))
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
scheduler.step()
print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")