机器学习任务6

# 导入必要的库

import pandas as pd

from sklearn import datasets

from sklearn.model_selection import train_test_split, cross_val_score

from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

 

# 加载 iris 数据集

iris = datasets.load_iris()

X = iris.data  # 特征

y = iris.target  # 标签

 

# 使用留出法留出 1/3 的样本作为测试集

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)

 

print(f"训练集样本数: {len(y_train)}, 测试集样本数: {len(y_test)}")

 

# 创建朴素贝叶斯分类器

gnb = GaussianNB()

 

# 使用训练集训练模型

gnb.fit(X_train, y_train)

 

# 使用五折交叉验证对模型性能进行评估

accuracy = cross_val_score(gnb, X_train, y_train, cv=5, scoring='accuracy')

precision = cross_val_score(gnb, X_train, y_train, cv=5, scoring='precision_macro')

recall = cross_val_score(gnb, X_train, y_train, cv=5, scoring='recall_macro')

f1 = cross_val_score(gnb, X_train, y_train, cv=5, scoring='f1_macro')

 

# 打印五折交叉验证结果

print(f"五折交叉验证准确度: {accuracy.mean():.2f} ± {accuracy.std():.2f}")

print(f"精度: {precision.mean():.2f} ± {precision.std():.2f}")

print(f"召回率: {recall.mean():.2f} ± {recall.std():.2f}")

print(f"F1 值: {f1.mean():.2f} ± {f1.std():.2f}")

 

# 使用测试集,测试模型的性能

y_pred = gnb.predict(X_test)

 

# 测试集的性能评估

test_accuracy = accuracy_score(y_test, y_pred)

test_precision = precision_score(y_test, y_pred, average='macro')

test_recall = recall_score(y_test, y_pred, average='macro')

test_f1 = f1_score(y_test, y_pred, average='macro')

 

# 打印测试集结果

print(f"\n测试集准确度: {test_accuracy:.2f}")

print(f"测试集精度: {test_precision:.2f}")

print(f"测试集召回率: {test_recall:.2f}")

print(f"测试集 F1 值: {test_f1:.2f}")

posted @ 2024-11-01 10:53  芊羽鱼  阅读(14)  评论(0)    收藏  举报