用 Keras 构建图像验证码识别系统(CNN + LSTM + CTC)
验证码识别是将图像中的字符序列转为文本的一项任务,适合用深度学习方法来建模。本文介绍如何使用 Keras 构建一个 CNN+LSTM+CTC 模型,实现变长验证码的识别。
- 安装依赖
pip install tensorflow pillow numpy captcha2. 生成验证码图像数据
我们使用 captcha 库自动生成图片,验证码长度设置为 3~5 个字符不等:
from captcha.image import ImageCaptcha
import numpy as np
import string, random, os
from PIL import Image
www.tmocr.com或q1092685548
CHAR_SET = string.digits + string.ascii_uppercase
WIDTH, HEIGHT = 160, 60
def generate_data(path='images', num=10000):
os.makedirs(path, exist_ok=True)
generator = ImageCaptcha(WIDTH, HEIGHT)
for i in range(num):
text_len = random.randint(3, 5)
text = ''.join(random.choices(CHAR_SET, k=text_len))
img = generator.generate_image(text)
img.save(os.path.join(path, f"{text}_{i}.png"))
generate_data()3. 创建 TensorFlow 数据集
import tensorflow as tf
char2idx = {c: i + 1 for i, c in enumerate(CHAR_SET)}
idx2char = {i: c for c, i in char2idx.items()}
blank_token = 0 # CTC专用
def parse_sample(file_path):
parts = tf.strings.split(tf.strings.split(file_path, os.sep)[-1], '_')
label_str = parts[0]
label = [char2idx[c] for c in tf.strings.unicode_split(label_str, 'UTF-8')]
img = tf.io.read_file(file_path)
img = tf.image.decode_png(img, channels=1)
img = tf.image.resize(img, (HEIGHT, WIDTH))
img = tf.cast(img, tf.float32) / 255.0
return img, label
def prepare_dataset(img_dir):
files = tf.data.Dataset.list_files(img_dir + '/*.png')
ds = files.map(parse_sample)
return ds.padded_batch(32, padded_shapes=([HEIGHT, WIDTH, 1], [None]))4. 构建模型:CNN + BiLSTM + CTC
from tensorflow.keras import layers, models
def build_model():
inputs = layers.Input(shape=(HEIGHT, WIDTH, 1))
x = layers.Conv2D(32, 3, padding='same', activation='relu')(inputs)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Conv2D(64, 3, padding='same', activation='relu')(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Reshape((WIDTH // 4, (HEIGHT // 4) * 64))(x)
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
x = layers.Dense(len(CHAR_SET) + 1, activation='softmax')(x) # +1 for blank
return models.Model(inputs, x)5. 自定义训练步骤(CTC)
@tf.function
def ctc_loss_fn(y_true, y_pred, input_len, label_len):
return tf.reduce_mean(tf.keras.backend.ctc_batch_cost(y_true, y_pred, input_len, label_len))
model = build_model()
optimizer = tf.keras.optimizers.Adam()
@tf.function
def train_step(images, labels):
label_len = tf.cast(tf.math.count_nonzero(labels, axis=1), tf.int32)
input_len = tf.fill([tf.shape(images)[0]], tf.shape(model(images))[1])
with tf.GradientTape() as tape:
logits = model(images, training=True)
loss = ctc_loss_fn(labels, logits, input_len, label_len)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
return loss6. 模型训练
dataset = prepare_dataset('images')
for epoch in range(10):
for batch, (imgs, lbls) in enumerate(dataset):
loss = train_step(imgs, lbls)
print(f"Epoch {epoch + 1}, Loss: {loss.numpy():.4f}")7. 解码预测结果
def decode_prediction(pred):
pred = tf.argmax(pred, axis=-1).numpy()
results = []
for p in pred:
res, prev = [], -1
for i in p:
if i != prev and i != blank_token:
res.append(idx2char.get(i, ''))
prev = i
results.append(''.join(res))
return results
浙公网安备 33010602011771号