2025.11.14上机实验五:BP 神经网络算法实现与测试
BP神经网络算法实现与测试
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
设置中文显示
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
1. 加载数据集并进行数据分析
def load_and_analyze_data():
"""加载iris数据集并进行基本数据分析"""
iris = load_iris()
# 创建DataFrame便于分析
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target
df['target_name'] = df['target'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})
print("=== 数据集基本信息 ===")
print(f"数据集大小: {df.shape}")
print("\n数据集前5行:")
print(df.head())
print("\n=== 数据集统计信息 ===")
print(df.describe())
print("\n=== 各类别分布 ===")
print(df['target_name'].value_counts())
# 绘制特征分布直方图
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
features = iris.feature_names
for i, feature in enumerate(features):
row = i // 2
col = i % 2
for target, color in zip([0, 1, 2], ['r', 'g', 'b']):
axes[row, col].hist(df[df['target'] == target][feature],
bins=15, alpha=0.5, label=iris.target_names[target], color=color)
axes[row, col].set_title(feature)
axes[row, col].set_xlabel(feature)
axes[row, col].set_ylabel('频数')
axes[row, col].legend()
plt.tight_layout()
plt.savefig('feature_distribution.png', dpi=300, bbox_inches='tight')
print("\n特征分布直方图已保存为 'feature_distribution.png'")
return iris.data, iris.target, iris.target_names
2. 实现五折交叉验证并训练模型
def train_with_cross_validation(X, y, target_names):
"""使用五折交叉验证训练BP神经网络模型"""
# 设置BP神经网络参数
mlp_params = {
'hidden_layer_sizes': (10, 10), # 隐藏层结构
'activation': 'relu', # 激活函数
'solver': 'adam', # 优化器
'alpha': 0.0001, # L2正则化参数
'batch_size': 'auto', # 批处理大小
'learning_rate': 'constant', # 学习率策略
'learning_rate_init': 0.001, # 初始学习率
'max_iter': 200, # 最大迭代次数
'shuffle': True, # 是否打乱数据
'random_state': 42 # 随机种子
}
print("\n=== BP神经网络参数 ===")
for param, value in mlp_params.items():
print(f"{param}: {value}")
# 五折交叉验证
kf = KFold(n_splits=5, shuffle=True, random_state=42)
# 存储各折的性能指标
accuracies = []
precisions = []
recalls = []
f1_scores = []
fold = 1
for train_index, test_index in kf.split(X):
print(f"\n--- 第 {fold} 折交叉验证 ---")
# 划分训练集和测试集
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
# 数据标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 创建并训练模型
mlp = MLPClassifier(**mlp_params)
mlp.fit(X_train_scaled, y_train)
# 预测
y_pred = mlp.predict(X_test_scaled)
# 计算性能指标
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
# 存储结果
accuracies.append(accuracy)
precisions.append(precision)
recalls.append(recall)
f1_scores.append(f1)
print(f"准确率: {accuracy:.4f}")
print(f"精确率: {precision:.4f}")
print(f"召回率: {recall:.4f}")
print(f"F1值: {f1:.4f}")
fold += 1
return {
'accuracies': accuracies,
'precisions': precisions,
'recalls': recalls,
'f1_scores': f1_scores
}
3. 分析和可视化结果
def analyze_results(results):
"""分析和可视化模型性能结果"""
print("\n=== 五折交叉验证平均性能 ===")
print(f"平均准确率: {np.mean(results['accuracies']):.4f} ± {np.std(results['accuracies']):.4f}")
print(f"平均精确率: {np.mean(results['precisions']):.4f} ± {np.std(results['precisions']):.4f}")
print(f"平均召回率: {np.mean(results['recalls']):.4f} ± {np.std(results['recalls']):.4f}")
print(f"平均F1值: {np.mean(results['f1_scores']):.4f} ± {np.std(results['f1_scores']):.4f}")
# 绘制性能指标折线图
folds = range(1, 6)
metrics = ['准确率', '精确率', '召回率', 'F1值']
results_list = [results['accuracies'], results['precisions'], results['recalls'], results['f1_scores']]
plt.figure(figsize=(12, 8))
for i, (metric_name, metric_values) in enumerate(zip(metrics, results_list)):
plt.plot(folds, metric_values, marker='o', linewidth=2, label=metric_name)
# 添加数值标签
for j, value in enumerate(metric_values):
plt.text(folds[j], value + 0.005, f'{value:.4f}', ha='center')
plt.title('五折交叉验证各指标性能对比', fontsize=14)
plt.xlabel('折数', fontsize=12)
plt.ylabel('性能值', fontsize=12)
plt.ylim(0.9, 1.01)
plt.xticks(folds)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(fontsize=12)
plt.tight_layout()
plt.savefig('cross_validation_results.png', dpi=300, bbox_inches='tight')
print("\n交叉验证结果图已保存为 'cross_validation_results.png'")
# 绘制性能指标箱线图
plt.figure(figsize=(10, 6))
plt.boxplot(results_list, labels=metrics)
plt.title('各性能指标分布箱线图', fontsize=14)
plt.ylabel('性能值', fontsize=12)
plt.ylim(0.9, 1.01)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('performance_boxplot.png', dpi=300, bbox_inches='tight')
print("各性能指标箱线图已保存为 'performance_boxplot.png'")
4. 主函数
def main():
print("=== 上机实验五:BP神经网络算法实现与测试 ===")
print("=" * 50)
# 加载数据并分析
X, y, target_names = load_and_analyze_data()
# 五折交叉验证训练模型
results = train_with_cross_validation(X, y, target_names)
# 分析结果
analyze_results(results)
print("\n=== 实验完成 ===")
print("实验报告所需图表已生成,包括:")
print("1. 特征分布直方图 (feature_distribution.png)")
print("2. 交叉验证结果对比图 (cross_validation_results.png)")
print("3. 性能指标箱线图 (performance_boxplot.png)")
if name == "main":
main()
BP神经网络算法实现与测试
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
设置中文显示
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
1. 加载数据集并进行数据分析
def load_and_analyze_data():
"""加载iris数据集并进行基本数据分析"""
iris = load_iris()
# 创建DataFrame便于分析
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target
df['target_name'] = df['target'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})
print("=== 数据集基本信息 ===")
print(f"数据集大小: {df.shape}")
print("\n数据集前5行:")
print(df.head())
print("\n=== 数据集统计信息 ===")
print(df.describe())
print("\n=== 各类别分布 ===")
print(df['target_name'].value_counts())
# 绘制特征分布直方图
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
features = iris.feature_names
for i, feature in enumerate(features):
row = i // 2
col = i % 2
for target, color in zip([0, 1, 2], ['r', 'g', 'b']):
axes[row, col].hist(df[df['target'] == target][feature],
bins=15, alpha=0.5, label=iris.target_names[target], color=color)
axes[row, col].set_title(feature)
axes[row, col].set_xlabel(feature)
axes[row, col].set_ylabel('频数')
axes[row, col].legend()
plt.tight_layout()
plt.savefig('feature_distribution.png', dpi=300, bbox_inches='tight')
print("\n特征分布直方图已保存为 'feature_distribution.png'")
return iris.data, iris.target, iris.target_names
2. 实现五折交叉验证并训练模型
def train_with_cross_validation(X, y, target_names):
"""使用五折交叉验证训练BP神经网络模型"""
# 设置BP神经网络参数
mlp_params = {
'hidden_layer_sizes': (10, 10), # 隐藏层结构
'activation': 'relu', # 激活函数
'solver': 'adam', # 优化器
'alpha': 0.0001, # L2正则化参数
'batch_size': 'auto', # 批处理大小
'learning_rate': 'constant', # 学习率策略
'learning_rate_init': 0.001, # 初始学习率
'max_iter': 200, # 最大迭代次数
'shuffle': True, # 是否打乱数据
'random_state': 42 # 随机种子
}
print("\n=== BP神经网络参数 ===")
for param, value in mlp_params.items():
print(f"{param}: {value}")
# 五折交叉验证
kf = KFold(n_splits=5, shuffle=True, random_state=42)
# 存储各折的性能指标
accuracies = []
precisions = []
recalls = []
f1_scores = []
fold = 1
for train_index, test_index in kf.split(X):
print(f"\n--- 第 {fold} 折交叉验证 ---")
# 划分训练集和测试集
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
# 数据标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 创建并训练模型
mlp = MLPClassifier(**mlp_params)
mlp.fit(X_train_scaled, y_train)
# 预测
y_pred = mlp.predict(X_test_scaled)
# 计算性能指标
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
# 存储结果
accuracies.append(accuracy)
precisions.append(precision)
recalls.append(recall)
f1_scores.append(f1)
print(f"准确率: {accuracy:.4f}")
print(f"精确率: {precision:.4f}")
print(f"召回率: {recall:.4f}")
print(f"F1值: {f1:.4f}")
fold += 1
return {
'accuracies': accuracies,
'precisions': precisions,
'recalls': recalls,
'f1_scores': f1_scores
}
3. 分析和可视化结果
def analyze_results(results):
"""分析和可视化模型性能结果"""
print("\n=== 五折交叉验证平均性能 ===")
print(f"平均准确率: {np.mean(results['accuracies']):.4f} ± {np.std(results['accuracies']):.4f}")
print(f"平均精确率: {np.mean(results['precisions']):.4f} ± {np.std(results['precisions']):.4f}")
print(f"平均召回率: {np.mean(results['recalls']):.4f} ± {np.std(results['recalls']):.4f}")
print(f"平均F1值: {np.mean(results['f1_scores']):.4f} ± {np.std(results['f1_scores']):.4f}")
# 绘制性能指标折线图
folds = range(1, 6)
metrics = ['准确率', '精确率', '召回率', 'F1值']
results_list = [results['accuracies'], results['precisions'], results['recalls'], results['f1_scores']]
plt.figure(figsize=(12, 8))
for i, (metric_name, metric_values) in enumerate(zip(metrics, results_list)):
plt.plot(folds, metric_values, marker='o', linewidth=2, label=metric_name)
# 添加数值标签
for j, value in enumerate(metric_values):
plt.text(folds[j], value + 0.005, f'{value:.4f}', ha='center')
plt.title('五折交叉验证各指标性能对比', fontsize=14)
plt.xlabel('折数', fontsize=12)
plt.ylabel('性能值', fontsize=12)
plt.ylim(0.9, 1.01)
plt.xticks(folds)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(fontsize=12)
plt.tight_layout()
plt.savefig('cross_validation_results.png', dpi=300, bbox_inches='tight')
print("\n交叉验证结果图已保存为 'cross_validation_results.png'")
# 绘制性能指标箱线图
plt.figure(figsize=(10, 6))
plt.boxplot(results_list, labels=metrics)
plt.title('各性能指标分布箱线图', fontsize=14)
plt.ylabel('性能值', fontsize=12)
plt.ylim(0.9, 1.01)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('performance_boxplot.png', dpi=300, bbox_inches='tight')
print("各性能指标箱线图已保存为 'performance_boxplot.png'")
4. 主函数
def main():
print("=== 上机实验五:BP神经网络算法实现与测试 ===")
print("=" * 50)
# 加载数据并分析
X, y, target_names = load_and_analyze_data()
# 五折交叉验证训练模型
results = train_with_cross_validation(X, y, target_names)
# 分析结果
analyze_results(results)
print("\n=== 实验完成 ===")
print("实验报告所需图表已生成,包括:")
print("1. 特征分布直方图 (feature_distribution.png)")
print("2. 交叉验证结果对比图 (cross_validation_results.png)")
print("3. 性能指标箱线图 (performance_boxplot.png)")
if name == "main":
main()

浙公网安备 33010602011771号