深度学习的验证码自动识别系统实现与优化

引言
验证码(CAPTCHA, Completely Automated Public Turing test to tell Computers and Humans Apart)广泛应用于网站注册、登录及重要操作中,用以防止自动化程序批量请求。然而,随着深度学习与计算机视觉技术的飞速发展,基于卷积神经网络(CNN)的验证码识别精度已经接近人类水平。本文将以 PyTorch 框架为基础,完整介绍一个从数据生成到模型优化的验证码识别系统实现过程。

技术背景
验证码特征

字符集:数字(0-9)、字母(a-z/A-Z)或混合

干扰方式:曲线、噪点、字符扭曲
更多内容访问ttocr.com或联系1436423940
长度固定或可变(常见 4~6 位)

深度学习优势

能自动学习特征,减少人工特征工程

CNN 在图像分类与序列特征提取方面效果突出

可结合 CTC(Connectionist Temporal Classification)处理变长验证码

系统实现步骤
3.1 数据集生成
验证码识别模型对数据量要求较高,因此人工采集成本过大时,可利用 captcha 库自动生成。
from captcha.image import ImageCaptcha
import random, string, os

参数配置
chars = string.digits + string.ascii_lowercase
dataset_dir = "dataset"
os.makedirs(dataset_dir, exist_ok=True)

def random_text(length=4):
return ''.join(random.choice(chars) for _ in range(length))

def generate_dataset(n=5000):
gen = ImageCaptcha(width=160, height=60)
for i in range(n):
text = random_text()
img = gen.generate_image(text)
img.save(f"{dataset_dir}/{text}_{i}.png")

generate_dataset()
生成的文件命名方式为 {标签}_{编号}.png,方便后续解析。

3.2 数据加载与预处理

import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import glob

class CaptchaDataset(Dataset):
def init(self, path, transform=None):
self.files = glob.glob(f"{path}/*.png")
self.transform = transform
self.chars = chars
self.char_to_idx = {c: i for i, c in enumerate(self.chars)}

def len(self):
return len(self.files)

def getitem(self, idx):
file = self.files[idx]
label_str = file.split("/")[-1].split("_")[0]
label = torch.tensor([self.char_to_idx[c] for c in label_str], dtype=torch.long)
img = Image.open(file).convert("L")
if self.transform:
img = self.transform(img)
return img, label
数据增强与归一化
from torchvision import transforms
transform = transforms.Compose([
transforms.Resize((60, 160)),
transforms.ToTensor()
])

dataset = CaptchaDataset("dataset", transform=transform)
loader = DataLoader(dataset, batch_size=64, shuffle=True)
3.3 模型设计
采用卷积层提取特征 + 全连接层进行多字符预测。

import torch.nn as nn

class CaptchaCNN(nn.Module):
def init(self, num_classes=len(chars), captcha_length=4):
super().init()
self.captcha_length = captcha_length
self.conv = nn.Sequential(
nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
)
self.fc = nn.Sequential(
nn.Linear(128 * 7 * 20, 1024),
nn.ReLU(),
nn.Dropout(0.25),
nn.Linear(1024, captcha_length * num_classes)
)

def forward(self, x):
x = self.conv(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x.view(-1, self.captcha_length, len(chars))
3.4 模型训练

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CaptchaCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(20):
model.train()
total_loss = 0
for imgs, labels in loader:
imgs, labels = imgs.to(device), labels.to(device)
optimizer.zero_grad()
output = model(imgs)
loss = sum(criterion(output[:, i, :], labels[:, i]) for i in range(4))
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {total_loss / len(loader):.4f}")
3.5 测试与预测

def decode(pred):
pred_idx = pred.argmax(2)
return [''.join(chars[i] for i in p) for p in pred_idx]

model.eval()
test_imgs, test_labels = next(iter(loader))
with torch.no_grad():
preds = model(test_imgs.to(device))
pred_texts = decode(preds.cpu())
real_texts = [''.join(chars[i] for i in label) for label in test_labels]
for p, r in zip(pred_texts[:5], real_texts[:5]):
print(f"Pred: {p}, Real: {r}")

posted @ 2025-08-11 23:50  ttocr、com  阅读(16)  评论(0)    收藏  举报