from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
def knn_demo():
"""
用knn算法对鸢尾花进行分类
:return:
"""
# 加载sklearn自带的鸢尾花数据
iris = load_iris()
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=6)
# 对数据进行标准化处理
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
# 测试集用transform,这样里面的参数才会和训练集是一样的,fit_transform是计算参数,并应用到数据中
x_test = transfer.transform(x_test)
# knn算法训练
estimator = KNeighborsClassifier(n_neighbors=3)
estimator.fit(x_train,y_train)
# 计算一个预测值
y_predict = estimator.predict(x_test)
print("y_predict:" , y_predict)
print("对比预测值和测试值", y_predict == y_test)
# 用测试集计算评估的准确率
scroe = estimator.score(x_test, y_test)
print("准确率为:", scroe)
return None
if __name__ == "__main__":
knn_demo()
对knn进行交叉验证和网络搜索,查找最优参数
def knn_gridsearch_demo():
"""
用knn算法对鸢尾花进行分类,加入网格搜索和交叉验证
:return:
"""
# 加载sklearn自带的鸢尾花数据
iris = load_iris()
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=6)
# 对数据进行标准化处理
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.fit_transform(x_test)
# knn算法训练
estimator = KNeighborsClassifier()
# 加入网格搜索和交叉验证
param_dict = {"n_neighbors":[1,3,5,7,9,11]}
estimator = GridSearchCV(estimator , param_grid=param_dict,cv=10)
estimator.fit(x_train,y_train)
# 计算一个预测值
y_predict = estimator.predict(x_test)
print("y_predict:" , y_predict)
print("对比预测值和测试值", y_predict == y_test)
# 用测试集计算评估的准确率
scroe = estimator.score(x_test, y_test)
print("准确率为:", scroe)
print("最佳参数:\n",estimator.best_params_)
print("最佳结果:\n", estimator.best_score_)
print("最佳估计器:\n", estimator.best_estimator_)
print("交叉验证结果:\n", estimator.cv_results_)
return None