import numpy as np
from sklearn import datasets
# 得到原始数据
digits = datasets.load_digits()
X = digits.data
y = digits.target
# 根据自己编写的函数,对原始数据进行切分
from ALG.train_test_split import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_train = 0.2, seed = 666)
# 1)定义搜索的参数范围
param_grid = [
{
'weights':['uniform'],
'n_neighbors':[i for i in range(1, 11)]
},
{
'weights':['distance'],
'n_neighbors':[i for i in range(1, 11)],
'p':[i for i in range(1, 6)]
}
]
# 2)创建一个需要进行网格搜索的机器学习算法对象
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier()
# 3)实例化scikit-learn中的网格搜索对象
from sklearn.model_selection import GridSearchCV
# 创GridSearchCV对应的实例对象,一般传入4个参数:
grid_search = GridSearchCV(knn_clf, param_grid, n_jobs = -1, verbose = 2)
# 4)对网格搜索的实例对象fit
%%time
grid_search.fit(X_train, y_train)
# 5)查看结果
# 查看网格搜索得到的最佳的分类器对应的参数(为最佳分类器的所有参数)
grid_search.best_estimator_
# 查看准确度
# 此处得到的准确度(0.9853963838664812)并没有之前(n_neighbors = 3时)得到的准确度高,因为评判标准改变了
grid_search.best_score_
# 查看之前定义的网格搜索参数中最优的结果
grid_search.best_params_
# 返回:{'n_neighbors': 3, 'p': 3, 'weights': 'distance'}
# 获取最佳分类器模型
knn_clf = grid_search.best_estimator_
# 6)使用最佳分类器进行预测
knn_clf.score(X_test, y_test)