爬虫 数字识别 手写
数字识别
import numpy as np # bmp 图片后缀 import matplotlib.pyplot as plt %matplotlib inline from sklearn.neighbors import KNeighborsClassifier
实验 举例 样式
img = plt.imread('./data/3/3_33.bmp') plt.imshow(img) img.shape

#取样数据
feature = []
#目标数据 target = [] #./data/3/3_33.bmp
#十个数字0-9 for i in range(0,10):
#每个数字 500个图片 for j in range(1,501): img_path = './data/'+str(i)+'/'+str(i)+'_'+str(j)+'.bmp' img_arr = plt.imread(img_path) feature.append(img_arr) target.append(i)
数据
feature = np.array(feature)
target = np.array(target)

feature.shape #特征是三维 #需要变形成二维 feature = feature.reshape(5000,784) #28*28 像素 feature.shape # target.shape
将三维数据修改为二维数据

将样本打乱,为了测试数据更全面
np.random.seed(3) np.random.shuffle(feature) np.random.seed(3) np.random.shuffle(target)
获取训练数据和测试数据
x_train = feature[:4950] y_train = target[:4950] x_test = feature[4950:] y_test = target[4950:]
实例化模型对象,训练
knn = KNeighborsClassifier(n_neighbors=15) knn.fit(x_train,y_train) knn.score(x_test,y_test)
#保存训练好的模型
from sklearn.externals import joblib
joblib.dump(knn,'./digist_knn.m')
knn = joblib.load('./digist_knn.m') knn

#使用测试数据测试模型的精准度 print('已知分类:',y_test) print('模型分类结果:',knn.predict(x_test))

#将外部图片带入模型进行识别 img_arr = plt.imread('./数字.jpg') plt.imshow(img_arr)
eight = img_arr[180:230,95:125]
plt.imshow(eight)


#降维操作 eight = eight.mean(axis=2) eight.shape
#像素的等比例压缩 import scipy.ndimage as ndimage eight = ndimage.zoom(eight,zoom = (28/50,28/30)) eight.shape
plt.imshow(eight)

eight = eight.reshape((1,784)) eight.shape
预测结果
knn.predict(eight)
。。。。。结果(一般基本吻合)

伤心 此次识别有差池

浙公网安备 33010602011771号