用Python识别验证码

  1. 准备工作
    安装必备库:

bash
pip install tensorflow opencv-python numpy matplotlib
2. 生成验证码样本
python
from captcha.image import ImageCaptcha
import random
import string
网站地址www.tmocr.com或联系q1092685548

生成4位随机验证码(数字+大写字母)

def generate_captcha():
chars = string.digits + string.ascii_uppercase
text = ''.join(random.choice(chars) for _ in range(4))
image = ImageCaptcha().generate_image(text)
return text, image

保存1000个样本

for i in range(1000):
text, img = generate_captcha()
img.save(f'captchas/{text}.png')
3. 数据预处理
python
import cv2
import numpy as np

def preprocess(img_path):
# 读取并转为灰度图
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
# 二值化处理
_, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# 归一化
img = img / 255.0
# 调整尺寸
img = cv2.resize(img, (160, 60))
# 增加通道维度
img = np.expand_dims(img, axis=-1)
return img
4. 构建深度学习模型
python
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Bidirectional, LSTM, Dense

def build_model():
# 输入层
input = Input(shape=(60, 160, 1))

# CNN部分
x = Conv2D(32, (3,3), activation='relu', padding='same')(input)
x = MaxPooling2D((2,2))(x)
x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
x = MaxPooling2D((2,2))(x)

# 转为序列
x = Reshape((15, 64*8))(x)

# RNN部分
x = Bidirectional(LSTM(128, return_sequences=True))(x)
x = Bidirectional(LSTM(128, return_sequences=True))(x)

# 输出层
output = Dense(36, activation='softmax')(x)  # 10数字+26字母

return Model(inputs=input, outputs=output)

model = build_model()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
5. 训练模型
python
import os
from sklearn.model_selection import train_test_split

加载数据

X = []
y = []
for filename in os.listdir('captchas'):
if filename.endswith('.png'):
text = filename.split('.')[0]
X.append(preprocess(f'captchas/{filename}'))
y.append(text)

划分训练集和测试集

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

开始训练

model.fit(np.array(X_train), np.array(y_train),
validation_data=(np.array(X_test), np.array(y_test)),
epochs=20,
batch_size=32)
6. 使用模型识别验证码
python
def predict_captcha(img_path):
img = preprocess(img_path)
img = np.expand_dims(img, axis=0) # 增加batch维度
pred = model.predict(img)
# 将预测结果转为文本
return decode_predictions(pred[0])

测试识别效果

print(predict_captcha('test_captcha.png'))用Python破解验证码

  1. 准备工作
    安装必备库:

bash
pip install tensorflow opencv-python numpy matplotlib
2. 生成验证码样本
python
from captcha.image import ImageCaptcha
import random
import string

生成4位随机验证码(数字+大写字母)

def generate_captcha():
chars = string.digits + string.ascii_uppercase
text = ''.join(random.choice(chars) for _ in range(4))
image = ImageCaptcha().generate_image(text)
return text, image

保存1000个样本

for i in range(1000):
text, img = generate_captcha()
img.save(f'captchas/{text}.png')
3. 数据预处理
python
import cv2
import numpy as np

def preprocess(img_path):
# 读取并转为灰度图
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
# 二值化处理
_, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# 归一化
img = img / 255.0
# 调整尺寸
img = cv2.resize(img, (160, 60))
# 增加通道维度
img = np.expand_dims(img, axis=-1)
return img
4. 构建深度学习模型
python
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Bidirectional, LSTM, Dense

def build_model():
# 输入层
input = Input(shape=(60, 160, 1))

# CNN部分
x = Conv2D(32, (3,3), activation='relu', padding='same')(input)
x = MaxPooling2D((2,2))(x)
x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
x = MaxPooling2D((2,2))(x)

# 转为序列
x = Reshape((15, 64*8))(x)

# RNN部分
x = Bidirectional(LSTM(128, return_sequences=True))(x)
x = Bidirectional(LSTM(128, return_sequences=True))(x)

# 输出层
output = Dense(36, activation='softmax')(x)  # 10数字+26字母

return Model(inputs=input, outputs=output)

model = build_model()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
5. 训练模型
python
import os
from sklearn.model_selection import train_test_split

加载数据

X = []
y = []
for filename in os.listdir('captchas'):
if filename.endswith('.png'):
text = filename.split('.')[0]
X.append(preprocess(f'captchas/{filename}'))
y.append(text)

划分训练集和测试集

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

开始训练

model.fit(np.array(X_train), np.array(y_train),
validation_data=(np.array(X_test), np.array(y_test)),
epochs=20,
batch_size=32)
6. 使用模型识别验证码
python
def predict_captcha(img_path):
img = preprocess(img_path)
img = np.expand_dims(img, axis=0) # 增加batch维度
pred = model.predict(img)
# 将预测结果转为文本
return decode_predictions(pred[0])

测试识别效果

print(predict_captcha('test_captcha.png'))

posted @ 2025-05-11 20:37  tmcor  阅读(66)  评论(0)    收藏  举报