PyTorch与注意力机制的验证码智能识别方法
为了提升验证码识别在复杂干扰环境下的鲁棒性,本文提出一种基于卷积神经网络(CNN)、双向长短时记忆网络(BiLSTM)与注意力机制的验证码识别方法。该方法先利用 CNN 提取空间特征,再通过 BiLSTM 建模时序信息,最后使用 Attention 模块对关键字符位置进行加权,从而实现高精度识别。
一、引言
验证码识别的难点在于字符粘连、旋转、背景噪声等干扰。传统 OCR 对此适应性差。本文所用的 CNN + BiLSTM + Attention 方法在提取空间特征的同时保留时序上下文,并通过注意力分配权重,有效聚焦于重要字符区域。
二、整体流程
数据集生成(随机字符验证码)
数据预处理(灰度化、归一化)
模型设计(CNN → BiLSTM → Attention → 全连接输出)
更多内容访问ttocr.com或联系1436423940
模型训练(交叉熵损失)
测试与准确率评估
三、数据生成
from captcha.image import ImageCaptcha
import random, string, os
from PIL import Image
import numpy as np
CHAR_SET = string.digits + string.ascii_uppercase
CAPTCHA_LEN = 4
IMG_W, IMG_H = 160, 60
def gen_text():
return ''.join(random.choices(CHAR_SET, k=CAPTCHA_LEN))
def gen_dataset(path, num=5000):
os.makedirs(path, exist_ok=True)
gen = ImageCaptcha(width=IMG_W, height=IMG_H)
for _ in range(num):
text = gen_text()
gen.write(text, os.path.join(path, f"{text}.png"))
gen_dataset("data/train", 5000)
gen_dataset("data/test", 1000)
四、模型构建(PyTorch)
import torch
import torch.nn as nn
import torch.nn.functional as F
class CNN_BiLSTM_Attention(nn.Module):
def init(self, num_classes):
super().init()
self.cnn = nn.Sequential(
nn.Conv2d(1, 32, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(2, 2)
)
self.bi_lstm = nn.LSTM(64*15, 128, bidirectional=True, batch_first=True)
self.attn = nn.Linear(256, 1)
self.fc = nn.Linear(256, num_classes)
def forward(self, x):
x = self.cnn(x) # [B, C, H, W]
b, c, h, w = x.size()
x = x.permute(0, 3, 1, 2).contiguous().view(b, w, ch) # [B, W, CH]
lstm_out, _ = self.bi_lstm(x) # [B, W, 256]
attn_weights = torch.softmax(self.attn(lstm_out), dim=1)
context = (lstm_out * attn_weights).sum(dim=1) # [B, 256]
out = self.fc(context) # [B, num_classes]
return out
五、训练
model = CNN_BiLSTM_Attention(len(CHAR_SET)**CAPTCHA_LEN)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
for epoch in range(30):
for imgs, labels in train_loader:
imgs, labels = imgs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(imgs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
浙公网安备 33010602011771号