验证码识别实战教程

一、项目概述
本教程将带您完整实现一个验证码识别系统，包含数据生成、模型训练和部署的全流程。

二、环境准备
bash
pip install tensorflow pillow numpy flask
三、核心代码实现
3.1 验证码生成器
python
from PIL import Image, ImageDraw, ImageFont
import random
import string
更多内容访问ttocr.com或联系1436423940
def generate_captcha(length=4, width=120, height=40):
"""生成基础验证码"""
img = Image.new('RGB', (width, height), (255, 255, 255))
draw = ImageDraw.Draw(img)

# 生成随机字符
chars = string.ascii_uppercase + string.digits
text = ''.join(random.choice(chars) for _ in range(length))

# 绘制文字
font = ImageFont.truetype("arial.ttf", 24)
x = 10
for ch in text:
    y = random.randint(5, 15)
    angle = random.randint(-15, 15)
    char_img = Image.new('RGBA', (30, 30), (0, 0, 0, 0))
    char_draw = ImageDraw.Draw(char_img)
    char_draw.text((5, 0), ch, font=font, fill=(0, 0, 0))
    char_img = char_img.rotate(angle, expand=1)
    img.paste(char_img, (x, y), char_img)
    x += 25 + random.randint(-3, 3)

# 添加干扰线
for _ in range(3):
    x1 = random.randint(0, width)
    y1 = random.randint(0, height)
    x2 = random.randint(0, width)
    y2 = random.randint(0, height)
    draw.line([(x1, y1), (x2, y2)], 
             fill=(random.randint(0, 180), 
                  random.randint(0, 180),
                  random.randint(0, 180)), 
             width=1)

return text, img

3.2 深度学习模型
python
import tensorflow as tf
from tensorflow.keras import layers

def build_model(char_count, captcha_length=4):
"""构建CNN识别模型"""
model = tf.keras.Sequential([
layers.Conv2D(32, (3,3), activation='relu', input_shape=(40, 120, 1)),
layers.MaxPooling2D((2,2)),
layers.Conv2D(64, (3,3), activation='relu'),
layers.MaxPooling2D((2,2)),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(char_count * captcha_length),
layers.Reshape((captcha_length, char_count)),
layers.Softmax()
])

model.compile(optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy'])

return model

3.3 数据预处理
python
import numpy as np

def preprocess_image(image):
"""图像预处理"""
img = image.convert('L') # 转灰度
img = img.resize((120, 40))
arr = np.array(img) / 255.0
return np.expand_dims(arr, axis=-1)

def prepare_dataset(sample_count=1000):
"""准备训练数据集"""
char_set = string.ascii_uppercase + string.digits
char_to_idx = {c:i for i,c in enumerate(char_set)}

texts, images = [], []
for _ in range(sample_count):
    text, img = generate_captcha()
    texts.append(text)
    images.append(preprocess_image(img))

X = np.array(images)
y = np.array([[char_to_idx[c] for c in text] for text in texts])
return X, y, char_set

四、训练与评估
4.1 训练流程
python

准备数据

X, y, char_set = prepare_dataset(2000)

构建模型

model = build_model(len(char_set))

训练模型

model.fit(X, y, epochs=15, batch_size=32, validation_split=0.2)

保存模型

model.save('captcha_model.h5')
4.2 预测函数
python
def predict(image_path, model, char_set):
"""预测验证码"""
img = Image.open(image_path)
processed = preprocess_image(img)
pred = model.predict(np.array([processed]))
return ''.join([char_set[i] for i in np.argmax(pred[0], axis=1)])
五、部署应用
5.1 Flask API服务
python
from flask import Flask, request, jsonify

app = Flask(name)
model = tf.keras.models.load_model('captcha_model.h5')

@app.route('/predict', methods=['POST'])
def api_predict():
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400

file = request.files['file']
try:
    result = predict(file.stream, model, char_set)
    return jsonify({'result': result})
except Exception as e:
    return jsonify({'error': str(e)}), 500

if name == 'main':
app.run(host='0.0.0.0', port=5000)

posted @ 2025-05-20 18:47 ttocr、com 阅读(15) 评论(0) 收藏举报

刷新页面返回顶部

验证码识别实战教程

准备数据

构建模型

训练模型

保存模型

公告