爬虫数字识别手写

数字识别

import numpy as np
# bmp 图片后缀
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.neighbors import KNeighborsClassifier

实验举例样式

img = plt.imread('./data/3/3_33.bmp')
plt.imshow(img)
img.shape

#取样数据
feature = []
#目标数据
target = []
#./data/3/3_33.bmp

#十个数字0-9
for i in range(0,10):
　　#每个数字  500个图片
    for j in range(1,501):
        img_path = './data/'+str(i)+'/'+str(i)+'_'+str(j)+'.bmp'
        img_arr = plt.imread(img_path)
        feature.append(img_arr)
        target.append(i)

数据

feature = np.array(feature)
target = np.array(target)


feature.shape  #特征是三维
#需要变形成二维
feature = feature.reshape(5000,784)  #28*28  像素
feature.shape
# target.shape

将三维数据修改为二维数据

将样本打乱，为了测试数据更全面

np.random.seed(3)
np.random.shuffle(feature)
np.random.seed(3)
np.random.shuffle(target)

获取训练数据和测试数据

x_train = feature[:4950]
y_train = target[:4950]

x_test = feature[4950:]
y_test = target[4950:]

实例化模型对象，训练

knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(x_train,y_train)
knn.score(x_test,y_test)

#保存训练好的模型
from sklearn.externals import joblib
joblib.dump(knn,'./digist_knn.m')

knn = joblib.load('./digist_knn.m')
knn

#使用测试数据测试模型的精准度
print('已知分类：',y_test)
print('模型分类结果：',knn.predict(x_test))

#将外部图片带入模型进行识别
img_arr = plt.imread('./数字.jpg')
plt.imshow(img_arr)

eight = img_arr[180:230,95:125]
plt.imshow(eight)

#降维操作
eight = eight.mean(axis=2)
eight.shape

#像素的等比例压缩
import scipy.ndimage as ndimage
eight = ndimage.zoom(eight,zoom = (28/50,28/30))
eight.shape

plt.imshow(eight)

eight = eight.reshape((1,784))
eight.shape

预测结果

knn.predict(eight)

。。。。。结果（一般基本吻合）

伤心此次识别有差池

posted @ 2019-08-14 18:05 我的IT007 阅读(185) 评论(0) 收藏举报

刷新页面返回顶部

我的IT007

爬虫 数字识别 手写

公告

爬虫数字识别手写