12.26

机器学习实验6
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.naive_bayes import GaussianNB

1. 加载数据集

def load_dataset():
"""
加载iris数据集并进行基本分析
"""
iris = load_iris()
X = iris.data # 特征数据
y = iris.target # 标签数据
feature_names = iris.feature_names # 特征名称
target_names = iris.target_names # 标签名称

print("=== 数据集基本信息 ===")
print(f"数据集大小: {X.shape}")
print(f"特征名称: {feature_names}")
print(f"标签名称: {target_names}")
print(f"标签分布: {np.bincount(y)}")

return X, y, target_names

2. 五折交叉验证函数

def cross_validation(X, y, model, n_splits=5):
"""
执行五折交叉验证

参数:
- X: 特征数据
- y: 标签数据
- model: 分类模型
- n_splits: 交叉验证折数

返回:
- 各种性能指标的平均值
"""
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)  # 五折交叉验证分割器

# 存储每次折的性能指标
accuracies = []
precisions = []
recalls = []
f1s = []

fold = 1
for train_index, test_index in kf.split(X):
    print(f"\n=== 第 {fold} 折 ===")
    
    # 划分训练集和测试集
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    print(f"训练集大小: {X_train.shape[0]}, 测试集大小: {X_test.shape[0]}")
    
    # 训练模型
    model.fit(X_train, y_train)
    
    # 预测
    y_pred = model.predict(X_test)
    
    # 计算性能指标
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    # 存储结果
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1s.append(f1)
    
    print(f"准确率: {accuracy:.4f}")
    print(f"精度: {precision:.4f}")
    print(f"召回率: {recall:.4f}")
    print(f"F1值: {f1:.4f}")
    
    fold += 1

# 计算平均值
avg_accuracy = np.mean(accuracies)
avg_precision = np.mean(precisions)
avg_recall = np.mean(recalls)
avg_f1 = np.mean(f1s)

print(f"\n=== 五折交叉验证平均结果 ===")
print(f"平均准确率: {avg_accuracy:.4f}")
print(f"平均精度: {avg_precision:.4f}")
print(f"平均召回率: {avg_recall:.4f}")
print(f"平均F1值: {avg_f1:.4f}")

return {
    'accuracy': avg_accuracy,
    'precision': avg_precision,
    'recall': avg_recall,
    'f1': avg_f1
}

3. 主函数

def main():
"""
主函数,执行完整的朴素贝叶斯分类和五折交叉验证流程
"""
print("朴素贝叶斯算法实现与测试")
print("=" * 50)

# 加载数据集
X, y, target_names = load_dataset()

print("\n=== 使用scikit-learn的GaussianNB模型 ===")
print("GaussianNB参数说明:")
print("- priors: 类的先验概率数组,如果指定则不根据数据调整")
print("- var_smoothing: 所有特征的最大方差的一部分,")
print("  用于添加到方差中以提高计算稳定性")
print("  默认值: 1e-9")

# 创建高斯朴素贝叶斯模型
model = GaussianNB()

# 执行五折交叉验证
results = cross_validation(X, y, model)

print("\n=== 模型性能分析 ===")
print("朴素贝叶斯分类器在iris数据集上表现优异,")
print("平均准确率达到{:.4f},说明模型能够正确分类大部分样本。".format(results['accuracy']))
print("各项指标(精度、召回率、F1值)也都非常高,")
print("表明模型在不同类别上都有良好的表现,没有明显的偏向性。")

if name == "main":
main()

posted @ 2025-12-26 22:18  山蚯  阅读(2)  评论(0)    收藏  举报