2025.11.14上机实验五:BP 神经网络算法实现与测试

BP神经网络算法实现与测试

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

设置中文显示

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

1. 加载数据集并进行数据分析

def load_and_analyze_data():
"""加载iris数据集并进行基本数据分析"""
iris = load_iris()

# 创建DataFrame便于分析
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target
df['target_name'] = df['target'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

print("=== 数据集基本信息 ===")
print(f"数据集大小: {df.shape}")
print("\n数据集前5行:")
print(df.head())

print("\n=== 数据集统计信息 ===")
print(df.describe())

print("\n=== 各类别分布 ===")
print(df['target_name'].value_counts())

# 绘制特征分布直方图
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
features = iris.feature_names

for i, feature in enumerate(features):
    row = i // 2
    col = i % 2
    for target, color in zip([0, 1, 2], ['r', 'g', 'b']):
        axes[row, col].hist(df[df['target'] == target][feature], 
                          bins=15, alpha=0.5, label=iris.target_names[target], color=color)
    axes[row, col].set_title(feature)
    axes[row, col].set_xlabel(feature)
    axes[row, col].set_ylabel('频数')
    axes[row, col].legend()

plt.tight_layout()
plt.savefig('feature_distribution.png', dpi=300, bbox_inches='tight')
print("\n特征分布直方图已保存为 'feature_distribution.png'")

return iris.data, iris.target, iris.target_names

2. 实现五折交叉验证并训练模型

def train_with_cross_validation(X, y, target_names):
"""使用五折交叉验证训练BP神经网络模型"""
# 设置BP神经网络参数
mlp_params = {
'hidden_layer_sizes': (10, 10), # 隐藏层结构
'activation': 'relu', # 激活函数
'solver': 'adam', # 优化器
'alpha': 0.0001, # L2正则化参数
'batch_size': 'auto', # 批处理大小
'learning_rate': 'constant', # 学习率策略
'learning_rate_init': 0.001, # 初始学习率
'max_iter': 200, # 最大迭代次数
'shuffle': True, # 是否打乱数据
'random_state': 42 # 随机种子
}

print("\n=== BP神经网络参数 ===")
for param, value in mlp_params.items():
    print(f"{param}: {value}")

# 五折交叉验证
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 存储各折的性能指标
accuracies = []
precisions = []
recalls = []
f1_scores = []

fold = 1
for train_index, test_index in kf.split(X):
    print(f"\n--- 第 {fold} 折交叉验证 ---")
    
    # 划分训练集和测试集
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # 数据标准化
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # 创建并训练模型
    mlp = MLPClassifier(**mlp_params)
    mlp.fit(X_train_scaled, y_train)
    
    # 预测
    y_pred = mlp.predict(X_test_scaled)
    
    # 计算性能指标
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    # 存储结果
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    
    print(f"准确率: {accuracy:.4f}")
    print(f"精确率: {precision:.4f}")
    print(f"召回率: {recall:.4f}")
    print(f"F1值: {f1:.4f}")
    
    fold += 1

return {
    'accuracies': accuracies,
    'precisions': precisions,
    'recalls': recalls,
    'f1_scores': f1_scores
}

3. 分析和可视化结果

def analyze_results(results):
"""分析和可视化模型性能结果"""
print("\n=== 五折交叉验证平均性能 ===")
print(f"平均准确率: {np.mean(results['accuracies']):.4f} ± {np.std(results['accuracies']):.4f}")
print(f"平均精确率: {np.mean(results['precisions']):.4f} ± {np.std(results['precisions']):.4f}")
print(f"平均召回率: {np.mean(results['recalls']):.4f} ± {np.std(results['recalls']):.4f}")
print(f"平均F1值: {np.mean(results['f1_scores']):.4f} ± {np.std(results['f1_scores']):.4f}")

# 绘制性能指标折线图
folds = range(1, 6)
metrics = ['准确率', '精确率', '召回率', 'F1值']
results_list = [results['accuracies'], results['precisions'], results['recalls'], results['f1_scores']]

plt.figure(figsize=(12, 8))

for i, (metric_name, metric_values) in enumerate(zip(metrics, results_list)):
    plt.plot(folds, metric_values, marker='o', linewidth=2, label=metric_name)
    
    # 添加数值标签
    for j, value in enumerate(metric_values):
        plt.text(folds[j], value + 0.005, f'{value:.4f}', ha='center')

plt.title('五折交叉验证各指标性能对比', fontsize=14)
plt.xlabel('折数', fontsize=12)
plt.ylabel('性能值', fontsize=12)
plt.ylim(0.9, 1.01)
plt.xticks(folds)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(fontsize=12)
plt.tight_layout()
plt.savefig('cross_validation_results.png', dpi=300, bbox_inches='tight')
print("\n交叉验证结果图已保存为 'cross_validation_results.png'")

# 绘制性能指标箱线图
plt.figure(figsize=(10, 6))
plt.boxplot(results_list, labels=metrics)
plt.title('各性能指标分布箱线图', fontsize=14)
plt.ylabel('性能值', fontsize=12)
plt.ylim(0.9, 1.01)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('performance_boxplot.png', dpi=300, bbox_inches='tight')
print("各性能指标箱线图已保存为 'performance_boxplot.png'")

4. 主函数

def main():
print("=== 上机实验五:BP神经网络算法实现与测试 ===")
print("=" * 50)

# 加载数据并分析
X, y, target_names = load_and_analyze_data()

# 五折交叉验证训练模型
results = train_with_cross_validation(X, y, target_names)

# 分析结果
analyze_results(results)

print("\n=== 实验完成 ===")
print("实验报告所需图表已生成,包括:")
print("1. 特征分布直方图 (feature_distribution.png)")
print("2. 交叉验证结果对比图 (cross_validation_results.png)")
print("3. 性能指标箱线图 (performance_boxplot.png)")

if name == "main":
main()

BP神经网络算法实现与测试

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

设置中文显示

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

1. 加载数据集并进行数据分析

def load_and_analyze_data():
"""加载iris数据集并进行基本数据分析"""
iris = load_iris()

# 创建DataFrame便于分析
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target
df['target_name'] = df['target'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

print("=== 数据集基本信息 ===")
print(f"数据集大小: {df.shape}")
print("\n数据集前5行:")
print(df.head())

print("\n=== 数据集统计信息 ===")
print(df.describe())

print("\n=== 各类别分布 ===")
print(df['target_name'].value_counts())

# 绘制特征分布直方图
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
features = iris.feature_names

for i, feature in enumerate(features):
    row = i // 2
    col = i % 2
    for target, color in zip([0, 1, 2], ['r', 'g', 'b']):
        axes[row, col].hist(df[df['target'] == target][feature], 
                          bins=15, alpha=0.5, label=iris.target_names[target], color=color)
    axes[row, col].set_title(feature)
    axes[row, col].set_xlabel(feature)
    axes[row, col].set_ylabel('频数')
    axes[row, col].legend()

plt.tight_layout()
plt.savefig('feature_distribution.png', dpi=300, bbox_inches='tight')
print("\n特征分布直方图已保存为 'feature_distribution.png'")

return iris.data, iris.target, iris.target_names

2. 实现五折交叉验证并训练模型

def train_with_cross_validation(X, y, target_names):
"""使用五折交叉验证训练BP神经网络模型"""
# 设置BP神经网络参数
mlp_params = {
'hidden_layer_sizes': (10, 10), # 隐藏层结构
'activation': 'relu', # 激活函数
'solver': 'adam', # 优化器
'alpha': 0.0001, # L2正则化参数
'batch_size': 'auto', # 批处理大小
'learning_rate': 'constant', # 学习率策略
'learning_rate_init': 0.001, # 初始学习率
'max_iter': 200, # 最大迭代次数
'shuffle': True, # 是否打乱数据
'random_state': 42 # 随机种子
}

print("\n=== BP神经网络参数 ===")
for param, value in mlp_params.items():
    print(f"{param}: {value}")

# 五折交叉验证
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 存储各折的性能指标
accuracies = []
precisions = []
recalls = []
f1_scores = []

fold = 1
for train_index, test_index in kf.split(X):
    print(f"\n--- 第 {fold} 折交叉验证 ---")
    
    # 划分训练集和测试集
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # 数据标准化
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # 创建并训练模型
    mlp = MLPClassifier(**mlp_params)
    mlp.fit(X_train_scaled, y_train)
    
    # 预测
    y_pred = mlp.predict(X_test_scaled)
    
    # 计算性能指标
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    # 存储结果
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    
    print(f"准确率: {accuracy:.4f}")
    print(f"精确率: {precision:.4f}")
    print(f"召回率: {recall:.4f}")
    print(f"F1值: {f1:.4f}")
    
    fold += 1

return {
    'accuracies': accuracies,
    'precisions': precisions,
    'recalls': recalls,
    'f1_scores': f1_scores
}

3. 分析和可视化结果

def analyze_results(results):
"""分析和可视化模型性能结果"""
print("\n=== 五折交叉验证平均性能 ===")
print(f"平均准确率: {np.mean(results['accuracies']):.4f} ± {np.std(results['accuracies']):.4f}")
print(f"平均精确率: {np.mean(results['precisions']):.4f} ± {np.std(results['precisions']):.4f}")
print(f"平均召回率: {np.mean(results['recalls']):.4f} ± {np.std(results['recalls']):.4f}")
print(f"平均F1值: {np.mean(results['f1_scores']):.4f} ± {np.std(results['f1_scores']):.4f}")

# 绘制性能指标折线图
folds = range(1, 6)
metrics = ['准确率', '精确率', '召回率', 'F1值']
results_list = [results['accuracies'], results['precisions'], results['recalls'], results['f1_scores']]

plt.figure(figsize=(12, 8))

for i, (metric_name, metric_values) in enumerate(zip(metrics, results_list)):
    plt.plot(folds, metric_values, marker='o', linewidth=2, label=metric_name)
    
    # 添加数值标签
    for j, value in enumerate(metric_values):
        plt.text(folds[j], value + 0.005, f'{value:.4f}', ha='center')

plt.title('五折交叉验证各指标性能对比', fontsize=14)
plt.xlabel('折数', fontsize=12)
plt.ylabel('性能值', fontsize=12)
plt.ylim(0.9, 1.01)
plt.xticks(folds)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(fontsize=12)
plt.tight_layout()
plt.savefig('cross_validation_results.png', dpi=300, bbox_inches='tight')
print("\n交叉验证结果图已保存为 'cross_validation_results.png'")

# 绘制性能指标箱线图
plt.figure(figsize=(10, 6))
plt.boxplot(results_list, labels=metrics)
plt.title('各性能指标分布箱线图', fontsize=14)
plt.ylabel('性能值', fontsize=12)
plt.ylim(0.9, 1.01)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('performance_boxplot.png', dpi=300, bbox_inches='tight')
print("各性能指标箱线图已保存为 'performance_boxplot.png'")

4. 主函数

def main():
print("=== 上机实验五:BP神经网络算法实现与测试 ===")
print("=" * 50)

# 加载数据并分析
X, y, target_names = load_and_analyze_data()

# 五折交叉验证训练模型
results = train_with_cross_validation(X, y, target_names)

# 分析结果
analyze_results(results)

print("\n=== 实验完成 ===")
print("实验报告所需图表已生成,包括:")
print("1. 特征分布直方图 (feature_distribution.png)")
print("2. 交叉验证结果对比图 (cross_validation_results.png)")
print("3. 性能指标箱线图 (performance_boxplot.png)")

if name == "main":
main()

posted @ 2025-12-29 00:01  ysd666  阅读(8)  评论(0)    收藏  举报