使用 Keras 构建验证码识别系统(CNN + LSTM)

本教程介绍如何使用 Keras(TensorFlow)搭建一个用于识别图像验证码的深度学习模型,采用 CNN 提取图像特征,LSTM 建模字符序列,最后输出预测结果。

  1. 安装依赖

pip install tensorflow pillow numpy captcha2. 生成验证码数据集

  1. from captcha.image import ImageCaptcha
    import string, random, os
    from PIL import Image
    characters = string.digits + string.ascii_uppercase
    captcha_length = 4
    img_width, img_height = 160, 60
    def generate_captcha(output_dir='keras_captcha', num_samples=5000):
    os.makedirs(output_dir, exist_ok=True)
    generator = ImageCaptcha(width=img_width, height=img_height)
    for i in range(num_samples):
    text = ''.join(random.choices(characters, k=captcha_length))
    image = generator.generate_image(text)
    image.save(f"{output_dir}/{text}{i}.png")
    generate_captcha()1. 3. 构建数据加载器
    import tensorflow as tf
    import numpy as np
    char_to_idx = {c: i for i, c in enumerate(characters)}
    idx_to_char = {i: c for c, i in char_to_idx.items()}
    def parse_image(filename):
    更多内容访问ttocr.com或联系1436423940
    label_str = tf.strings.split(tf.strings.split(filename, os.sep)[-1], '
    ')[0]
    label = tf.strings.bytes_split(label_str)
    label = tf.map_fn(lambda c: char_to_idx[c.numpy().decode()], label, fn_output_signature=tf.int32)image = tf.io.read_file(filename)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, [img_height, img_width])
    return image, labeldef create_dataset(data_dir, batch_size=64):
    files = tf.data.Dataset.list_files(data_dir + '/*.png')
    dataset = files.map(lambda f: tf.py_function(parse_image, [f], [tf.float32, tf.int32]),
    num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.map(lambda img, label: (img, tf.reshape(label, [captcha_length])))
    return dataset.shuffle(1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
  2. 构建模型
    from tensorflow.keras import layers, models
    def build_model():
    inputs = layers.Input(shape=(img_height, img_width, 3))x = layers.Conv2D(32, 3, activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, 3, activation='relu', padding='same')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
    x = layers.MaxPooling2D()(x)

x = layers.Reshape((x.shape[1], x.shape[2]*x.shape[3]))(x)
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
x = layers.Bidirectional(layers.LSTM(128))(x)

outputs = [layers.Dense(len(characters), activation='softmax', name=f'char_{i}')(x) for i in range(captcha_length)]
model = models.Model(inputs=inputs, outputs=outputs)
return modelmodel = build_model()
model.compile(
loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
5. 训练模型

train_ds = create_dataset('keras_captcha', batch_size=64)
model.fit(train_ds, epochs=10, steps_per_epoch=100)
6. 推理函数
import numpy as np
def predict_image(model, path):
img = tf.io.read_file(path)
img = tf.image.decode_png(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.float32)
img = tf.image.resize(img, [img_height, img_width])
img = tf.expand_dims(img, axis=0)preds = model.predict(img)
pred_chars = [idx_to_char[np.argmax(p)] for p in preds]
return ''.join(pred_chars)print(predict_image(model, 'keras_captcha/Z9C4_12.png'))

posted @ 2025-05-01 17:28  ttocr、com  阅读(25)  评论(0)    收藏  举报