# print(ord('0'),ord('A'),ord('a')) # ascii码 0-48 A-65 a-97
# print(10+26+26) # 62个维度
import numpy as np
# 62维特征向量的排列按如下内存结构
# 10位数字 + 26位大写字母 + 26位小写字母
# [0,0,0,0,0,0....,0,0]
def char2pos(c:str):
"""
将一个字符转化为62位特征向量中的位置
:param c:
:return:
"""
c_idx = ord(c)
# 可能为0-9 a-z A-Z
# 减去ascii码偏移量,加上前面数字字母的长度
if c_idx < 65:
return c_idx - 48 # 0-9
elif c_idx < 97:
return c_idx - 65 + 10 # A-Z
else:
return c_idx - 97 + 10 + 26 # a-z
# print(char2pos('a'))
def text2vec(content:str):
"""
将字符串转换为特诊向量
:param content:
:return:
"""
result = np.zeros((62*6,))
for idx,c in enumerate(content):
base_idx = idx * 62
result[base_idx + char2pos(c)] = 1
return result
import os
import matplotlib.pyplot as plt
def load_img():
"""
加载所有的训练数据
:return:
"""
img_names = os.listdir('./img')
# 这里仅有32张图片
train_data = np.zeros((32,50 * 240))
target_data = np.zeros((32,62 * 6)) # 验证码有6个字符
for idx,name_str in enumerate(img_names):
img = plt.imread('./img/{}'.format(name_str))
# 获取验证码内容,验证码内容用图片名称表示
img_name = name_str.split('.')[0]
# 将图片内容和目标分别进行赋值
train_data[idx] = img.reshape(-1)
target_data[idx] = text2vec(img_name)
return train_data,target_data
def vec2text(v:np.array):
"""
将预测结果转换为字符串
:param v:
:return:
"""
idx_all = v.nonzero()[0]
result = []
for idx in idx_all:
idx_char = idx % 62
real_char = ''
if idx_char < 10:
# 数字
real_char = idx_char + ord('0')
elif idx_char < 36:
real_char = idx_char - 10 + ord('A')
else:
real_char = idx_char - 10 - 26 + ord('a')
result.append(chr(real_char))
return ''.join(result)
# print(vec2text(text2vec('8uf2wj')))
from keras.models import Sequential,load_model
from keras.layers import Conv2D,MaxPool2D,Flatten,Dense
import tensorflow as tf
from sklearn.metrics import r2_score
train_data,target_data = load_img()
train_data = train_data.reshape(-1,50,240,1).astype(np.float32)
target_data = target_data.astype(np.float32)
model = Sequential()
model.add(Conv2D(16,(3,3),input_shape=(50,240,1),activation=tf.nn.relu,padding='SAME')) # 1-单通道
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(8,(3,3),padding='SAME',activation=tf.nn.relu))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(512,activation=tf.nn.relu))
model.add(Dense(372,activation=tf.nn.softmax))
# from keras.losses import categorical_crossentropy
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['categorical_crossentropy'])
model.fit(train_data,target_data,epochs=1000)
print('r2_score is ::::',r2_score(target_data,model.predict(train_data)))