机器学习任务6
# 导入必要的库
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
# 加载 iris 数据集
iris = datasets.load_iris()
X = iris.data # 特征
y = iris.target # 标签
# 使用留出法留出 1/3 的样本作为测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)
print(f"训练集样本数: {len(y_train)}, 测试集样本数: {len(y_test)}")
# 创建朴素贝叶斯分类器
gnb = GaussianNB()
# 使用训练集训练模型
gnb.fit(X_train, y_train)
# 使用五折交叉验证对模型性能进行评估
accuracy = cross_val_score(gnb, X_train, y_train, cv=5, scoring='accuracy')
precision = cross_val_score(gnb, X_train, y_train, cv=5, scoring='precision_macro')
recall = cross_val_score(gnb, X_train, y_train, cv=5, scoring='recall_macro')
f1 = cross_val_score(gnb, X_train, y_train, cv=5, scoring='f1_macro')
# 打印五折交叉验证结果
print(f"五折交叉验证准确度: {accuracy.mean():.2f} ± {accuracy.std():.2f}")
print(f"精度: {precision.mean():.2f} ± {precision.std():.2f}")
print(f"召回率: {recall.mean():.2f} ± {recall.std():.2f}")
print(f"F1 值: {f1.mean():.2f} ± {f1.std():.2f}")
# 使用测试集,测试模型的性能
y_pred = gnb.predict(X_test)
# 测试集的性能评估
test_accuracy = accuracy_score(y_test, y_pred)
test_precision = precision_score(y_test, y_pred, average='macro')
test_recall = recall_score(y_test, y_pred, average='macro')
test_f1 = f1_score(y_test, y_pred, average='macro')
# 打印测试集结果
print(f"\n测试集准确度: {test_accuracy:.2f}")
print(f"测试集精度: {test_precision:.2f}")
print(f"测试集召回率: {test_recall:.2f}")
print(f"测试集 F1 值: {test_f1:.2f}")

浙公网安备 33010602011771号