12.19
机器学习实验5
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
1. 加载数据集并进行数据分析
print("=== 1. 数据集加载与分析 ===")
iris = load_iris()
X, y = iris.data, iris.target
print(f"数据集形状: X={X.shape}, y={y.shape}")
print(f"特征名称: {iris.feature_names}")
print(f"类别名称: {iris.target_names}")
print(f"类别分布: {np.bincount(y)}")
数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
2. 自定义BP神经网络实现
class BPNeuralNetwork:
def init(self, input_size, hidden_size, output_size, learning_rate=0.1, epochs=1000):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.learning_rate = learning_rate
self.epochs = epochs
# 初始化权重和偏置
self.W1 = np.random.randn(input_size, hidden_size) * 0.01
self.b1 = np.zeros((1, hidden_size))
self.W2 = np.random.randn(hidden_size, output_size) * 0.01
self.b2 = np.zeros((1, output_size))
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(self, x):
return x * (1 - x)
def softmax(self, x):
exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
return exp_x / np.sum(exp_x, axis=1, keepdims=True)
def forward(self, X):
# 前向传播
self.z1 = np.dot(X, self.W1) + self.b1
self.a1 = self.sigmoid(self.z1)
self.z2 = np.dot(self.a1, self.W2) + self.b2
self.a2 = self.softmax(self.z2)
return self.a2
def backward(self, X, y):
# 反向传播
m = X.shape[0]
# 输出层误差
y_onehot = np.zeros((m, self.output_size))
y_onehot[np.arange(m), y] = 1
error_output = self.a2 - y_onehot
# 隐藏层误差
error_hidden = np.dot(error_output, self.W2.T) * self.sigmoid_derivative(self.a1)
# 权重和偏置更新
dW2 = np.dot(self.a1.T, error_output) / m
db2 = np.sum(error_output, axis=0, keepdims=True) / m
dW1 = np.dot(X.T, error_hidden) / m
db1 = np.sum(error_hidden, axis=0, keepdims=True) / m
self.W2 -= self.learning_rate * dW2
self.b2 -= self.learning_rate * db2
self.W1 -= self.learning_rate * dW1
self.b1 -= self.learning_rate * db1
def fit(self, X, y):
for epoch in range(self.epochs):
self.forward(X)
self.backward(X, y)
def predict(self, X):
return np.argmax(self.forward(X), axis=1)
3. 五折交叉验证函数
def cross_validate(model, X, y, k=5):
kf = KFold(n_splits=k, shuffle=True, random_state=42)
metrics = {
'accuracy': [],
'precision': [],
'recall': [],
'f1': []
}
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
# 训练模型
if hasattr(model, 'fit'):
model.fit(X_train, y_train)
# 预测
if hasattr(model, 'predict'):
y_pred = model.predict(X_test)
# 计算指标
metrics['accuracy'].append(accuracy_score(y_test, y_pred))
metrics['precision'].append(precision_score(y_test, y_pred, average='macro'))
metrics['recall'].append(recall_score(y_test, y_pred, average='macro'))
metrics['f1'].append(f1_score(y_test, y_pred, average='macro'))
return metrics
4. 使用自定义BP神经网络进行训练和测试
print("\n=== 2. 自定义BP神经网络训练与测试 ===")
input_size = X_scaled.shape[1]
hidden_size = 10
output_size = len(np.unique(y))
custom_bp = BPNeuralNetwork(input_size, hidden_size, output_size, learning_rate=0.1, epochs=2000)
custom_metrics = cross_validate(custom_bp, X_scaled, y, k=5)
print("自定义BP神经网络五折交叉验证结果:")
for metric, values in custom_metrics.items():
print(f"{metric}: 平均={np.mean(values):.4f}, 标准差={np.std(values):.4f}")
for i, v in enumerate(values):
print(f" 第{i+1}折: {v:.4f}")
5. 使用scikit-learn的MLPClassifier进行训练和测试
print("\n=== 3. scikit-learn MLPClassifier训练与测试 ===")
from sklearn.neural_network import MLPClassifier
MLPClassifier参数说明:
- hidden_layer_sizes: 隐藏层神经元数量,(10,)表示一个隐藏层有10个神经元
- activation: 激活函数,'relu'表示ReLU函数
- solver: 优化器,'adam'表示Adam优化算法
- learning_rate_init: 初始学习率,0.01
- max_iter: 最大迭代次数,2000
- random_state: 随机种子,确保结果可复现
sklearn_bp = MLPClassifier(
hidden_layer_sizes=(10,),
activation='relu',
solver='adam',
learning_rate_init=0.01,
max_iter=2000,
random_state=42
)
sklearn_metrics = cross_validate(sklearn_bp, X_scaled, y, k=5)
print("scikit-learn MLPClassifier五折交叉验证结果:")
for metric, values in sklearn_metrics.items():
print(f"{metric}: 平均={np.mean(values):.4f}, 标准差={np.std(values):.4f}")
for i, v in enumerate(values):
print(f" 第{i+1}折: {v:.4f}")
6. 结果比较分析
print("\n=== 4. 模型性能比较分析 ===")
print("自定义BP神经网络 vs scikit-learn MLPClassifier:")
for metric in ['accuracy', 'precision', 'recall', 'f1']:
custom_mean = np.mean(custom_metrics[metric])
sklearn_mean = np.mean(sklearn_metrics[metric])
print(f"{metric}: 自定义={custom_mean:.4f}, scikit-learn={sklearn_mean:.4f}, 差值={abs(custom_mean - sklearn_mean):.4f}")
print("\n结论:")
if np.mean(custom_metrics['accuracy']) > np.mean(sklearn_metrics['accuracy']):
print("自定义BP神经网络在准确率上表现更好。")
else:
print("scikit-learn MLPClassifier在准确率上表现更好。")
print("\n实验完成!")

浙公网安备 33010602011771号