使用 TensorFlow 实现图像验证码识别

本项目通过 TensorFlow 实现图像验证码识别系统,包括验证码图像的生成、数据加载、模型构建、训练及预测。

  1. 安装必要库
    确保环境中已经安装了以下库:

pip install tensorflow pillow numpy captcha
1.
2. 生成验证码图像
使用 captcha 库生成验证码数据集。

from captcha.image import ImageCaptcha
import os
import random
import string

characters = string.digits + string.ascii_uppercase
captcha_length = 4

def generate_images(output_dir='captcha_data', num=5000):
os.makedirs(output_dir, exist_ok=True)
generator = ImageCaptcha(width=160, height=60)
for i in range(num):
text = ''.join(random.choices(characters, k=captcha_length))
image = generator.generate_image(text)
image.save(os.path.join(output_dir, f'{text}_{i}.png'))
更多内容访问ttocr.com或联系1436423940
generate_images()
1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
11.
12.
13.
14.
15.
16.
17.
3. 构建数据加载器
定义用于加载图像及标签的数据生成器。

import numpy as np
from PIL import Image
import tensorflow as tf

char_to_idx = {c: i for i, c in enumerate(characters)}
idx_to_char = {i: c for c, i in char_to_idx.items()}

def parse_image(filename):
label_str = tf.strings.split(tf.strings.split(filename, os.sep)[-1], '_')[0]
label = [char_to_idx[c] for c in label_str.numpy().decode()]
image = Image.open(filename.numpy().decode()).convert('RGB').resize((160, 60))
image = np.array(image) / 255.0
return image, label

def load_dataset(folder):
files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.png')]
def gen():
for f in files:
image, label = tf.py_function(parse_image, inp=[f], Tout=(tf.float32, tf.int32))
yield image, label
return tf.data.Dataset.from_generator(gen, output_signature=(
tf.TensorSpec(shape=(60, 160, 3), dtype=tf.float32),
tf.TensorSpec(shape=(captcha_length,), dtype=tf.int32)
)).batch(64).prefetch(tf.data.AUTOTUNE)

train_dataset = load_dataset('captcha_data')
1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
11.
12.
13.
14.
15.
16.
17.
18.
19.
20.
21.
22.
23.
24.
25.
26.
4. 构建模型
使用卷积层和LSTM处理图像并输出预测结果。

from tensorflow.keras import layers, models

def build_model():
inputs = layers.Input(shape=(60, 160, 3))
x = layers.Conv2D(32, 3, activation='relu', padding='same')(inputs)
x = layers.MaxPooling2D(2)(x)
x = layers.Conv2D(64, 3, activation='relu', padding='same')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
x = layers.Reshape((40, 128 * 15))(x)
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
x = layers.TimeDistributed(layers.Dense(len(characters), activation='softmax'))(x)
model = models.Model(inputs, x)
return model

model = build_model()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
11.
12.
13.
14.
15.
16.
17.
5. 训练模型
对模型进行训练。

def split_labels(y):
return [y[:, i] for i in range(captcha_length)]

model.fit(
train_dataset.map(lambda x, y: (x, tf.stack(split_labels(y), axis=1))),
epochs=15
)
1.
2.
3.
4.
5.
6.
7.
6. 验证模型
单张图像进行预测。

def predict_image(model, path):
image = Image.open(path).convert('RGB').resize((160, 60))
image = np.array(image) / 255.0
image = np.expand_dims(image, axis=0)
preds = model.predict(image)[0]
pred_text = ''.join([idx_to_char[np.argmax(p)] for p in preds])
return pred_text

test_img = 'captcha_data/7X3K_1.png'
print('预测结果:', predict_image(model, test_img))

posted @ 2025-04-29 22:43  ttocr、com  阅读(34)  评论(0)    收藏  举报