数据分析之KNN数字识别手写

import numpy as np
# bmp 图片后缀
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.neighbors import KNeighborsClassifier

  提炼样本数据

img_arr = plt.imread('./data/3/3_100.bmp')
plt.imshow(img_arr)

  读出所有的数据

feature = []
target = []
for i in range(0,10):
    for j in range(1,501):
        img_path = './data/'+str(i)+'/'+str(i)+'_'+str(j+1)+'.bmp'
        img_arr = plt.imread(img_path)
        feature.append(img_arr)
        target.append(i)

  样本数据的提取

feature = np.array(featrue)
target = np.array(target)
feature.shape

target.shape
#feature是一个三维数组(执行将维操作)
feature = feature.reshape(5000,28*28)

feature.shape

  将样本数据打乱

np.random.seed(3)
np.random.shuffle(feature)
np.random.seed(3)
np.random.shuffle(target)

  获取训练数据和测试数据

x_train = feature[:4950]
y_train = target[:4950]
x_test = feature[-50:]
y_test = target[-50:]

  实例化模型对象,训练

knn = KNeighborsClassifier(n_neighbors=30)
knn.fit(x_train,y_train)
knn.score(x_train,y_train)

  

print('预测分类:',knn.predict(x_test))
print('真实数据:',y_test)

  模型的保存

from sklearn.externals import joblib

joblib.dump(knn,"./knn.m"

  读取模型

knn = joblib.load("./knn.m")

  让模型进行外部图片的识别

img_arr = plt.imread('./数字.jpg')
plt.imshow(img_arr)

  利用切片取值

five_arr = img_arr[95:150,85:1305]
plt.imshow(new_arr)

 

#five数组是三维的,需要进行降维,舍弃第三个表示颜色的维度
five_arr = five_arr.mean(axis=2)
five_arr.shape

 

import scipy.ndimage as ndimage
five = ndimage.zoom(five_arr,zoom = (28/65,28/55))
knn.predict(five.reshape(1,784))

 

posted @ 2019-03-11 16:25  Montant  阅读(332)  评论(0编辑  收藏  举报