12.10
BP神经网络伪代码
以下是完整的BP神经网络伪代码,包含前向传播和反向传播过程:
初始化神经网络
def initialize_network(input_size, hidden_layers, output_size):
"""
初始化神经网络参数
输入:
input_size: 输入层大小
hidden_layers: 隐藏层结构列表 [n_hidden1, n_hidden2, ...]
output_size: 输出层大小
返回:
network: 初始化的神经网络参数
"""
network = {}
layer_sizes = [input_size] + hidden_layers + [output_size]
for i in range(1, len(layer_sizes)):
# 初始化权重矩阵 (使用Xavier初始化)
network[f'W{i}'] = np.random.randn(layer_sizes[i], layer_sizes[i-1]) * np.sqrt(2.0/(layer_sizes[i-1]+layer_sizes[i]))
# 初始化偏置向量
network[f'b{i}'] = np.zeros((layer_sizes[i], 1))
return network
前向传播
def forward_pass(network, X):
"""
执行前向传播
输入:
network: 神经网络参数
X: 输入数据 (n_features, n_samples)
返回:
cache: 包含各层激活值和加权输入的字典
"""
cache = {}
cache['A0'] = X # 输入层
# 计算隐藏层和输出层
L = len(network) // 2 # 总层数(不包括输入层)
for l in range(1, L+1):
# 计算加权输入
cache[f'Z{l}'] = np.dot(network[f'W{l}'], cache[f'A{l-1}']) + network[f'b{l}']
# 应用激活函数
if l == L: # 输出层
cache[f'A{l}'] = softmax(cache[f'Z{l}']) # 多分类问题使用softmax
else: # 隐藏层
cache[f'A{l}'] = relu(cache[f'Z{l}']) # 隐藏层使用ReLU
return cache
反向传播
def backward_pass(network, cache, X, Y):
"""
执行反向传播计算梯度
输入:
network: 神经网络参数
cache: 前向传播缓存
X: 输入数据
Y: 真实标签
返回:
grads: 包含各层梯度的字典
"""
grads = {}
m = X.shape[1] # 样本数量
L = len(network) // 2 # 总层数(不包括输入层)
# 计算输出层误差
dZ = cache[f'A{L}'] - Y
grads[f'dW{L}'] = (1/m) * np.dot(dZ, cache[f'A{L-1}'].T)
grads[f'db{L}'] = (1/m) * np.sum(dZ, axis=1, keepdims=True)
# 反向传播到隐藏层
for l in reversed(range(1, L)):
# 计算当前层误差
dA = np.dot(network[f'W{l+1}'].T, dZ)
dZ = dA * relu_derivative(cache[f'Z{l}'])
# 计算梯度
grads[f'dW{l}'] = (1/m) * np.dot(dZ, cache[f'A{l-1}'].T)
grads[f'db{l}'] = (1/m) * np.sum(dZ, axis=1, keepdims=True)
return grads
参数更新
def update_parameters(network, grads, learning_rate):
"""
使用梯度下降更新参数
输入:
network: 神经网络参数
grads: 梯度字典
learning_rate: 学习率
返回:
network: 更新后的神经网络参数
"""
L = len(network) // 2
for l in range(1, L+1):
network[f'W{l}'] -= learning_rate * grads[f'dW{l}']
network[f'b{l}'] -= learning_rate * grads[f'db{l}']
return network
激活函数及其导数
def relu(Z):
"""ReLU激活函数"""
return np.maximum(0, Z)
def relu_derivative(Z):
"""ReLU导数"""
return Z > 0
def softmax(Z):
"""Softmax激活函数"""
expZ = np.exp(Z - np.max(Z, axis=0, keepdims=True))
return expZ / np.sum(expZ, axis=0, keepdims=True)
训练函数
def train_bp_network(X_train, Y_train, hidden_layers, learning_rate=0.01, epochs=1000):
"""
训练BP神经网络
输入:
X_train: 训练数据 (n_features, n_samples)
Y_train: 训练标签 (n_classes, n_samples)
hidden_layers: 隐藏层结构列表
learning_rate: 学习率
epochs: 训练轮数
返回:
network: 训练好的神经网络
"""
# 初始化网络
input_size = X_train.shape[0]
output_size = Y_train.shape[0]
network = initialize_network(input_size, hidden_layers, output_size)
# 训练循环
for epoch in range(epochs):
# 前向传播
cache = forward_pass(network, X_train)
# 计算损失
loss = cross_entropy_loss(cache[f'A{len(hidden_layers)+1}'], Y_train)
# 反向传播
grads = backward_pass(network, cache, X_train, Y_train)
# 更新参数
network = update_parameters(network, grads, learning_rate)
# 定期打印损失
if epoch % 100 == 0:
print(f"Epoch {epoch}, Loss: {loss}")
return network
损失函数
def cross_entropy_loss(AL, Y):
"""交叉熵损失函数"""
m = Y.shape[1]
loss = -np.sum(Y * np.log(AL + 1e-8)) / m
return loss
预测函数
def predict(network, X):
"""
使用训练好的网络进行预测
输入:
network: 训练好的神经网络
X: 输入数据
返回:
predictions: 预测结果
"""
cache = forward_pass(network, X)
L = len(network) // 2
predictions = np.argmax(cache[f'A{L}'], axis=0)
return predictions

浙公网安备 33010602011771号