爬虫 数字识别 手写

数字识别

import numpy as np
# bmp 图片后缀
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.neighbors import KNeighborsClassifier

实验 举例  样式

img = plt.imread('./data/3/3_33.bmp')
plt.imshow(img)
img.shape

 

#取样数据
feature = []
#目标数据 target
= [] #./data/3/3_33.bmp

#十个数字0-9
for i in range(0,10):
  #每个数字 500个图片
for j in range(1,501): img_path = './data/'+str(i)+'/'+str(i)+'_'+str(j)+'.bmp' img_arr = plt.imread(img_path) feature.append(img_arr) target.append(i)

数据

feature = np.array(feature)
target = np.array(target)

 


feature.shape #特征是三维 #需要变形成二维 feature
= feature.reshape(5000,784) #28*28 像素 feature.shape # target.shape

将三维数据修改为二维数据

 将样本打乱,为了测试数据更全面

np.random.seed(3)
np.random.shuffle(feature)
np.random.seed(3)
np.random.shuffle(target)

获取训练数据和测试数据

x_train = feature[:4950]
y_train = target[:4950]

x_test = feature[4950:]
y_test = target[4950:]

实例化模型对象,训练

knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(x_train,y_train)
knn.score(x_test,y_test)
#保存训练好的模型
from sklearn.externals import joblib
joblib.dump(knn,'./digist_knn.m')

 

knn = joblib.load('./digist_knn.m')
knn

#使用测试数据测试模型的精准度
print('已知分类:',y_test)
print('模型分类结果:',knn.predict(x_test))

 

 

#将外部图片带入模型进行识别
img_arr = plt.imread('./数字.jpg')
plt.imshow(img_arr)


eight = img_arr[180:230,95:125]
plt.imshow(eight)

 

 

 

#降维操作
eight = eight.mean(axis=2)
eight.shape
#像素的等比例压缩
import scipy.ndimage as ndimage
eight = ndimage.zoom(eight,zoom = (28/50,28/30))
eight.shape

 

plt.imshow(eight)

 

 

eight = eight.reshape((1,784))
eight.shape

预测结果

knn.predict(eight)

。。。。。结果(一般基本吻合)

伤心  此次识别有差池

 

posted @ 2019-08-14 18:05  我的IT007  阅读(185)  评论(0)    收藏  举报