12.26
机器学习实验6
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.naive_bayes import GaussianNB
1. 加载数据集
def load_dataset():
"""
加载iris数据集并进行基本分析
"""
iris = load_iris()
X = iris.data # 特征数据
y = iris.target # 标签数据
feature_names = iris.feature_names # 特征名称
target_names = iris.target_names # 标签名称
print("=== 数据集基本信息 ===")
print(f"数据集大小: {X.shape}")
print(f"特征名称: {feature_names}")
print(f"标签名称: {target_names}")
print(f"标签分布: {np.bincount(y)}")
return X, y, target_names
2. 五折交叉验证函数
def cross_validation(X, y, model, n_splits=5):
"""
执行五折交叉验证
参数:
- X: 特征数据
- y: 标签数据
- model: 分类模型
- n_splits: 交叉验证折数
返回:
- 各种性能指标的平均值
"""
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42) # 五折交叉验证分割器
# 存储每次折的性能指标
accuracies = []
precisions = []
recalls = []
f1s = []
fold = 1
for train_index, test_index in kf.split(X):
print(f"\n=== 第 {fold} 折 ===")
# 划分训练集和测试集
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
print(f"训练集大小: {X_train.shape[0]}, 测试集大小: {X_test.shape[0]}")
# 训练模型
model.fit(X_train, y_train)
# 预测
y_pred = model.predict(X_test)
# 计算性能指标
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
# 存储结果
accuracies.append(accuracy)
precisions.append(precision)
recalls.append(recall)
f1s.append(f1)
print(f"准确率: {accuracy:.4f}")
print(f"精度: {precision:.4f}")
print(f"召回率: {recall:.4f}")
print(f"F1值: {f1:.4f}")
fold += 1
# 计算平均值
avg_accuracy = np.mean(accuracies)
avg_precision = np.mean(precisions)
avg_recall = np.mean(recalls)
avg_f1 = np.mean(f1s)
print(f"\n=== 五折交叉验证平均结果 ===")
print(f"平均准确率: {avg_accuracy:.4f}")
print(f"平均精度: {avg_precision:.4f}")
print(f"平均召回率: {avg_recall:.4f}")
print(f"平均F1值: {avg_f1:.4f}")
return {
'accuracy': avg_accuracy,
'precision': avg_precision,
'recall': avg_recall,
'f1': avg_f1
}
3. 主函数
def main():
"""
主函数,执行完整的朴素贝叶斯分类和五折交叉验证流程
"""
print("朴素贝叶斯算法实现与测试")
print("=" * 50)
# 加载数据集
X, y, target_names = load_dataset()
print("\n=== 使用scikit-learn的GaussianNB模型 ===")
print("GaussianNB参数说明:")
print("- priors: 类的先验概率数组,如果指定则不根据数据调整")
print("- var_smoothing: 所有特征的最大方差的一部分,")
print(" 用于添加到方差中以提高计算稳定性")
print(" 默认值: 1e-9")
# 创建高斯朴素贝叶斯模型
model = GaussianNB()
# 执行五折交叉验证
results = cross_validation(X, y, model)
print("\n=== 模型性能分析 ===")
print("朴素贝叶斯分类器在iris数据集上表现优异,")
print("平均准确率达到{:.4f},说明模型能够正确分类大部分样本。".format(results['accuracy']))
print("各项指标(精度、召回率、F1值)也都非常高,")
print("表明模型在不同类别上都有良好的表现,没有明显的偏向性。")
if name == "main":
main()

浙公网安备 33010602011771号