验证码识别实战教程
一、项目概述
本教程将带您完整实现一个验证码识别系统,包含数据生成、模型训练和部署的全流程。
二、环境准备
bash
pip install tensorflow pillow numpy flask
三、核心代码实现
3.1 验证码生成器
python
from PIL import Image, ImageDraw, ImageFont
import random
import string
更多内容访问ttocr.com或联系1436423940
def generate_captcha(length=4, width=120, height=40):
"""生成基础验证码"""
img = Image.new('RGB', (width, height), (255, 255, 255))
draw = ImageDraw.Draw(img)
# 生成随机字符
chars = string.ascii_uppercase + string.digits
text = ''.join(random.choice(chars) for _ in range(length))
# 绘制文字
font = ImageFont.truetype("arial.ttf", 24)
x = 10
for ch in text:
y = random.randint(5, 15)
angle = random.randint(-15, 15)
char_img = Image.new('RGBA', (30, 30), (0, 0, 0, 0))
char_draw = ImageDraw.Draw(char_img)
char_draw.text((5, 0), ch, font=font, fill=(0, 0, 0))
char_img = char_img.rotate(angle, expand=1)
img.paste(char_img, (x, y), char_img)
x += 25 + random.randint(-3, 3)
# 添加干扰线
for _ in range(3):
x1 = random.randint(0, width)
y1 = random.randint(0, height)
x2 = random.randint(0, width)
y2 = random.randint(0, height)
draw.line([(x1, y1), (x2, y2)],
fill=(random.randint(0, 180),
random.randint(0, 180),
random.randint(0, 180)),
width=1)
return text, img
3.2 深度学习模型
python
import tensorflow as tf
from tensorflow.keras import layers
def build_model(char_count, captcha_length=4):
"""构建CNN识别模型"""
model = tf.keras.Sequential([
layers.Conv2D(32, (3,3), activation='relu', input_shape=(40, 120, 1)),
layers.MaxPooling2D((2,2)),
layers.Conv2D(64, (3,3), activation='relu'),
layers.MaxPooling2D((2,2)),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(char_count * captcha_length),
layers.Reshape((captcha_length, char_count)),
layers.Softmax()
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
3.3 数据预处理
python
import numpy as np
def preprocess_image(image):
"""图像预处理"""
img = image.convert('L') # 转灰度
img = img.resize((120, 40))
arr = np.array(img) / 255.0
return np.expand_dims(arr, axis=-1)
def prepare_dataset(sample_count=1000):
"""准备训练数据集"""
char_set = string.ascii_uppercase + string.digits
char_to_idx = {c:i for i,c in enumerate(char_set)}
texts, images = [], []
for _ in range(sample_count):
text, img = generate_captcha()
texts.append(text)
images.append(preprocess_image(img))
X = np.array(images)
y = np.array([[char_to_idx[c] for c in text] for text in texts])
return X, y, char_set
四、训练与评估
4.1 训练流程
python
准备数据
X, y, char_set = prepare_dataset(2000)
构建模型
model = build_model(len(char_set))
训练模型
model.fit(X, y, epochs=15, batch_size=32, validation_split=0.2)
保存模型
model.save('captcha_model.h5')
4.2 预测函数
python
def predict(image_path, model, char_set):
"""预测验证码"""
img = Image.open(image_path)
processed = preprocess_image(img)
pred = model.predict(np.array([processed]))
return ''.join([char_set[i] for i in np.argmax(pred[0], axis=1)])
五、部署应用
5.1 Flask API服务
python
from flask import Flask, request, jsonify
app = Flask(name)
model = tf.keras.models.load_model('captcha_model.h5')
@app.route('/predict', methods=['POST'])
def api_predict():
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
file = request.files['file']
try:
result = predict(file.stream, model, char_set)
return jsonify({'result': result})
except Exception as e:
return jsonify({'error': str(e)}), 500
if name == 'main':
app.run(host='0.0.0.0', port=5000)
浙公网安备 33010602011771号