12.19

机器学习实验5
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier

1. 加载数据集并进行数据分析

print("=== 1. 数据集加载与分析 ===")
iris = load_iris()
X, y = iris.data, iris.target

print(f"数据集形状: X={X.shape}, y={y.shape}")
print(f"特征名称: {iris.feature_names}")
print(f"类别名称: {iris.target_names}")
print(f"类别分布: {np.bincount(y)}")

数据标准化

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

2. 自定义BP神经网络实现

class BPNeuralNetwork:
def init(self, input_size, hidden_size, output_size, learning_rate=0.1, epochs=1000):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.learning_rate = learning_rate
self.epochs = epochs

    # 初始化权重和偏置
    self.W1 = np.random.randn(input_size, hidden_size) * 0.01
    self.b1 = np.zeros((1, hidden_size))
    self.W2 = np.random.randn(hidden_size, output_size) * 0.01
    self.b2 = np.zeros((1, output_size))

def sigmoid(self, x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(self, x):
    return x * (1 - x)

def softmax(self, x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def forward(self, X):
    # 前向传播
    self.z1 = np.dot(X, self.W1) + self.b1
    self.a1 = self.sigmoid(self.z1)
    self.z2 = np.dot(self.a1, self.W2) + self.b2
    self.a2 = self.softmax(self.z2)
    return self.a2

def backward(self, X, y):
    # 反向传播
    m = X.shape[0]
    
    # 输出层误差
    y_onehot = np.zeros((m, self.output_size))
    y_onehot[np.arange(m), y] = 1
    error_output = self.a2 - y_onehot
    
    # 隐藏层误差
    error_hidden = np.dot(error_output, self.W2.T) * self.sigmoid_derivative(self.a1)
    
    # 权重和偏置更新
    dW2 = np.dot(self.a1.T, error_output) / m
    db2 = np.sum(error_output, axis=0, keepdims=True) / m
    dW1 = np.dot(X.T, error_hidden) / m
    db1 = np.sum(error_hidden, axis=0, keepdims=True) / m
    
    self.W2 -= self.learning_rate * dW2
    self.b2 -= self.learning_rate * db2
    self.W1 -= self.learning_rate * dW1
    self.b1 -= self.learning_rate * db1

def fit(self, X, y):
    for epoch in range(self.epochs):
        self.forward(X)
        self.backward(X, y)

def predict(self, X):
    return np.argmax(self.forward(X), axis=1)

3. 五折交叉验证函数

def cross_validate(model, X, y, k=5):
kf = KFold(n_splits=k, shuffle=True, random_state=42)
metrics = {
'accuracy': [],
'precision': [],
'recall': [],
'f1': []
}

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # 训练模型
    if hasattr(model, 'fit'):
        model.fit(X_train, y_train)
    
    # 预测
    if hasattr(model, 'predict'):
        y_pred = model.predict(X_test)
    
    # 计算指标
    metrics['accuracy'].append(accuracy_score(y_test, y_pred))
    metrics['precision'].append(precision_score(y_test, y_pred, average='macro'))
    metrics['recall'].append(recall_score(y_test, y_pred, average='macro'))
    metrics['f1'].append(f1_score(y_test, y_pred, average='macro'))

return metrics

4. 使用自定义BP神经网络进行训练和测试

print("\n=== 2. 自定义BP神经网络训练与测试 ===")
input_size = X_scaled.shape[1]
hidden_size = 10
output_size = len(np.unique(y))

custom_bp = BPNeuralNetwork(input_size, hidden_size, output_size, learning_rate=0.1, epochs=2000)
custom_metrics = cross_validate(custom_bp, X_scaled, y, k=5)

print("自定义BP神经网络五折交叉验证结果:")
for metric, values in custom_metrics.items():
print(f"{metric}: 平均={np.mean(values):.4f}, 标准差={np.std(values):.4f}")
for i, v in enumerate(values):
print(f" 第{i+1}折: {v:.4f}")

5. 使用scikit-learn的MLPClassifier进行训练和测试

print("\n=== 3. scikit-learn MLPClassifier训练与测试 ===")
from sklearn.neural_network import MLPClassifier

MLPClassifier参数说明:

- hidden_layer_sizes: 隐藏层神经元数量,(10,)表示一个隐藏层有10个神经元

- activation: 激活函数,'relu'表示ReLU函数

- solver: 优化器,'adam'表示Adam优化算法

- learning_rate_init: 初始学习率,0.01

- max_iter: 最大迭代次数,2000

- random_state: 随机种子,确保结果可复现

sklearn_bp = MLPClassifier(
hidden_layer_sizes=(10,),
activation='relu',
solver='adam',
learning_rate_init=0.01,
max_iter=2000,
random_state=42
)

sklearn_metrics = cross_validate(sklearn_bp, X_scaled, y, k=5)

print("scikit-learn MLPClassifier五折交叉验证结果:")
for metric, values in sklearn_metrics.items():
print(f"{metric}: 平均={np.mean(values):.4f}, 标准差={np.std(values):.4f}")
for i, v in enumerate(values):
print(f" 第{i+1}折: {v:.4f}")

6. 结果比较分析

print("\n=== 4. 模型性能比较分析 ===")
print("自定义BP神经网络 vs scikit-learn MLPClassifier:")
for metric in ['accuracy', 'precision', 'recall', 'f1']:
custom_mean = np.mean(custom_metrics[metric])
sklearn_mean = np.mean(sklearn_metrics[metric])
print(f"{metric}: 自定义={custom_mean:.4f}, scikit-learn={sklearn_mean:.4f}, 差值={abs(custom_mean - sklearn_mean):.4f}")

print("\n结论:")
if np.mean(custom_metrics['accuracy']) > np.mean(sklearn_metrics['accuracy']):
print("自定义BP神经网络在准确率上表现更好。")
else:
print("scikit-learn MLPClassifier在准确率上表现更好。")

print("\n实验完成!")

posted @ 2025-12-19 23:26  山蚯  阅读(4)  评论(0)    收藏  举报