PyTorch 的深度学习验证码识别实践
验证码(CAPTCHA)是防止自动化脚本滥用系统的一种安全措施。然而,随着深度学习技术的发展,利用卷积神经网络(CNN)对验证码进行自动识别已成为可能。本文介绍了基于 PyTorch 的验证码识别系统实现方法,包括数据生成、模型设计、训练与测试流程,并讨论了可能的优化方向。
引言
传统验证码识别依赖于图像处理和特征工程,通常需要对噪声去除、字符分割等步骤进行精细调整。这种方法对验证码样式的变化非常敏感,泛化能力不足。
深度学习方法则可以端到端地学习从原始图像到字符标签的映射,省去了复杂的手动特征提取过程,并且在多样化验证码场景中表现更为稳定。
数据生成
为了避免手动收集大量验证码图片,本系统使用 Python 的 captcha 库动态生成训练数据。
示例代码:
from captcha.image import ImageCaptcha
import random, string, os
更多内容访问ttocr.com或联系1436423940
def generate_dataset(output_dir, num_images=5000, length=4):
os.makedirs(output_dir, exist_ok=True)
image = ImageCaptcha(width=160, height=60)
characters = string.ascii_uppercase + string.digits
for i in range(num_images):
text = ''.join(random.choices(characters, k=length))
image.write(text, os.path.join(output_dir, f"{text}_{i}.png"))
generate_dataset('train', 8000)
generate_dataset('test', 2000)
图片尺寸:160×60
字符集:大写字母 + 数字
标签直接存储在文件名中,方便后续解析。
数据预处理
训练前需要对图像进行灰度化、归一化处理,并将标签转换为独热编码(one-hot vector)。
from PIL import Image
import torch
from torch.utils.data import Dataset
import glob
class CaptchaDataset(Dataset):
def init(self, folder, transform=None):
self.files = glob.glob(f"{folder}/*.png")
self.transform = transform
self.chars = string.ascii_uppercase + string.digits
self.char2idx = {c: i for i, c in enumerate(self.chars)}
def len(self):
return len(self.files)
def getitem(self, idx):
filepath = self.files[idx]
label_str = filepath.split('/')[-1].split('_')[0]
img = Image.open(filepath).convert('L') # 灰度化
if self.transform:
img = self.transform(img)
label = torch.tensor([self.char2idx[c] for c in label_str], dtype=torch.long)
return img, label
模型设计
本系统采用多通道 CNN + 全连接层的架构,每个字符位置独立预测:
import torch.nn as nn
class CaptchaCNN(nn.Module):
def init(self, num_chars=36, captcha_length=4):
super().init()
self.conv = nn.Sequential(
nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
)
self.fc = nn.Linear(128 * 20 * 7, captcha_length * num_chars)
self.num_chars = num_chars
self.length = captcha_length
def forward(self, x):
x = self.conv(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x.view(-1, self.length, self.num_chars)
模型训练
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import DataLoader
transform = transforms.Compose([
transforms.Resize((60, 160)),
transforms.ToTensor()
])
train_data = CaptchaDataset('train', transform=transform)
test_data = CaptchaDataset('test', transform=transform)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64)
model = CaptchaCNN()
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(20):
model.train()
total_loss = 0
for imgs, labels in train_loader:
optimizer.zero_grad()
output = model(imgs)
loss = sum(F.cross_entropy(output[:, i, :], labels[:, i]) for i in range(model.length))
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")
6. 模型测试与评估
def accuracy(model, loader):
model.eval()
correct, total = 0, 0
with torch.no_grad():
for imgs, labels in loader:
output = model(imgs)
preds = output.argmax(2)
correct += (preds == labels).all(1).sum().item()
total += labels.size(0)
return correct / total
acc = accuracy(model, test_loader)
print(f"验证码整体识别准确率: {acc*100:.2f}%")
浙公网安备 33010602011771号