# 定义神经网络结构

$y=0.4x^2 + 0.3xsin(15x) + 0.01cos(50x)-0.3$

## 权重矩阵W1/B1

W1的尺寸是128x1，B1的尺寸是128x1。

# 创造训练数据

import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

def TargetFunction(x):
p1 = 0.4 * (x**2)
p2 = 0.3 * x * np.sin(15 * x)
p3 = 0.01 * np.cos(50 * x)
y = p1 + p2 + p3 - 0.3
return y

def CreateSampleDataXY(m):
S = np.random.random((m,2))
S[:,1] = TargetFunction(S[:,0])
return S

def CreateTestData(n):
TX = np.linspace(0,1,100)
TY = TargetFunction(TX)
TZ = np.zeros(n)
return TX, TY, TZ

$\begin{pmatrix} x_1, y_1\\ x_2, y_2\\ \dots\\ x_m, y_m\\ \end{pmatrix}$

# 定义前向计算过程

$Z1=W1*X+B1$
$A1=sigmoid(Z1)$
$Z2=W2*A1+B2$
$A2=Z2 \tag{这一步可以省略}$

def ForwardCalculation(x, dictWeights):
W1 = dictWeights["W1"]
B1 = dictWeights["B1"]
W2 = dictWeights["W2"]
B2 = dictWeights["B2"]

Z1 = np.dot(W1,x) + B1
A1 = sigmoid(Z1)
Z2 = np.dot(W2,A1) + B2
A2 = Z2  # 这一步可以省略

dictCache ={"A1": A1, "A2": A2}
return A2, dictCache

# 定义代价函数

$Loss = \frac{1}{2}(Z - Y) ^ 2$

# 定义针对w和b的梯度函数

## 求W2的梯度

$Z2 = W2*A1+B2$

$Loss = \frac{1}{2}(Z2-Y2)^2$

$\frac{\partial{Loss}}{\partial{W2}} = \frac{\partial{Loss}}{\partial{Z2}}*\frac{\partial{Z2}}{\partial{W2}}$

$\frac{\partial{Loss}}{\partial{Z2}} = \frac{\partial{}}{\partial{Z2}}[\frac{(Z2-Y)^2}{2}] = Z2-Y$

$\frac{\partial{Z2}}{\partial{W2}} = \frac{\partial{}}{\partial{W2}}(W2*A1+B2) = A1^T$

$\frac{\partial{Loss}}{\partial{W2}} = \frac{\partial{Loss}}{\partial{Z2}}*\frac{\partial{Z2}}{\partial{W2}} = (Z2-Y)*A1^T$

## 求B2的梯度

$\frac{\partial{Loss}}{\partial{B2}} = \frac{\partial{Loss}}{\partial{Z2}}*\frac{\partial{Z2}}{\partial{B2}}$

$\frac{\partial{Z2}}{\partial{B2}} = \frac{\partial{(W2*A1+B2)}}{\partial{B2}} = 1$

$\frac{\partial{Loss}}{\partial{B2}} = \frac{\partial{Loss}}{\partial{Z2}}*\frac{\partial{Z2}}{\partial{B2}} = Z2-Y$

## 求W1的梯度

$A1 = sigmoid(Z1)$

$Z1 = W1*X+B1$

$\frac{\partial{Loss}}{\partial{Z1}} = \frac{\partial{Loss}}{\partial{Z2}}*\frac{\partial{Z2}}{\partial{A1}}*\frac{\partial{A1}}{\partial{Z1}}$

$\frac{\partial{Loss}}{\partial{Z2}} = Z2-Y = dZ2$

$\frac{\partial{Z2}}{\partial{A1}} = \frac{\partial{}}{\partial{A1}}(W2*A1+B2) = W2^T$

$\frac{\partial{A1}}{\partial{Z1}} = \frac{\partial{}}{\partial{Z1}}(sigmoid(Z1)) = A1*(1-A1)$

$\frac{\partial{Loss}}{\partial{Z1}} = W2^T * dZ2 * A1 * (1-A1) = dZ1$

$\frac{\partial{Loss}}{\partial{W1}} = \frac{\partial{Loss}}{\partial{Z1}}*\frac{\partial{Z1}}{\partial{W1}}=dZ1*\frac{\partial{(W1*X+B1)}}{\partial{W1}}=dZ1*X^T$

$\frac{\partial{Loss}}{\partial{B1}} = \frac{\partial{Loss}}{\partial{Z1}}*\frac{\partial{Z1}}{\partial{B1}}=dZ1*\frac{\partial{(W1*X+B1)}}{\partial{B1}}=dZ1$

def BackPropagation(x, y, dictCache, dictWeights):
A1 = dictCache["A1"]
A2 = dictCache["A2"]
W2 = dictWeights["W2"]

dLoss_Z2 = A2 - y
dZ2 = dLoss_Z2
dW2 = dZ2 * A1.T
dB2 = dZ2

dZ2_A1 = W2.T * dZ2
dA1_Z1 = A1 * (1 - A1)
# dZ1 is dLoss_Z1
dZ1 = dZ2_A1 * dA1_Z1
dW1 = dZ1 * x
dB1 = dZ1

dictGrads = {"dW1":dW1, "dB1":dB1, "dW2":dW2, "dB2":dB2}
return dictGrads

# 每次迭代后更新w,b的值

def UpdateWeights(dictWeights, dictGrads, learningRate):
W1 = dictWeights["W1"]
B1 = dictWeights["B1"]
W2 = dictWeights["W2"]
B2 = dictWeights["B2"]

W1 = W1 - learningRate * dW1
W2 = W2 - learningRate * dW2
B1 = B1 - learningRate * dB1
B2 = B2 - learningRate * dB2

dictWeights = {"W1": W1,"B1": B1,"W2": W2,"B2": B2}

return dictWeights

# 帮助函数

def sigmoid(x):
s=1/(1+np.exp(-x))
return s

def initialize_with_zeros(n_x,n_h,n_y):
np.random.seed(2)
# W1=np.random.randn(n_h,n_x)*0.00000001    # W1=np.random.randn(n_h,n_x)
W1=np.random.uniform(-np.sqrt(6)/np.sqrt(n_x+n_h),np.sqrt(6)/np.sqrt(n_h+n_x),size=(n_h,n_x))
# W1=np.reshape(32,784)
B1=np.zeros((n_h,1))
# W2=np.random.randn(n_y,n_h)*0.00000001  # W2=np.random.randn(n_y,n_h)
W2=np.random.uniform(-np.sqrt(6)/np.sqrt(n_y+n_h),np.sqrt(6)/np.sqrt(n_y+n_h),size=(n_y,n_h))
B2=np.zeros((n_y,1))

assert (W1.shape == (n_h, n_x))
assert (B1.shape == (n_h, 1))
assert (W2.shape == (n_y, n_h))
assert (B2.shape == (n_y, 1))

dictWeights = {"W1": W1,"B1": B1,"W2": W2,"B2": B2}

return dictWeights

# 主程序初始化

m = 1000
S = CreateSampleDataXY(m)
#plt.scatter(S[:,0], S[:,1], 1)
#plt.show()
n_input, n_hidden, n_output = 1, 128, 1
learning_rate = 0.1
eps = 1e-10
dictWeights = initialize_with_zeros(n_input, n_hidden, n_output)
max_iteration = 1000
loss, prev_loss, diff_loss = 0, 0, 0

# 程序主循环

for iteration in range(max_iteration):
for i in range(m):
x = S[i,0]
y = S[i,1]
A2, dictCache = ForwardCalculation(x, dictWeights)
print("iteration", iteration)

# 测试并输出拟合结果

tm = 100
TX, TY, TZ = CreateTestData(tm)
correctCount = 0
for i in range(tm):
x = TX[i]
y = TY[i]
a2, dict = ForwardCalculation(x, dictWeights)
TZ[i] = a2

plt.scatter(TX, TY)
plt.plot(TX, TZ, 'r')
str = str.format("cell:{0} sample:{1} iteration:{2} rate:{3}", n_hidden, m, max_iteration, learning_rate)
plt.title(str)
plt.show()

# 参数调整

1. 神经元数=128
2. 输入训练数据量=1000
3. 迭代次数=1000
4. 权重调整步进值=0.1

## 步长值的变化（标准值0.1）

0 128 1000 1000 0.1 5
1 64 1000 2000 0.1 3
2 96 1000 1000 0.1 2.5
3 256 1000 500 0.1 0
4 128 500 1000 0.1 2
5 128 1500 1000 0.1 5
6 128 1000 500 0.1 2.5
7 128 1000 1500 0.1 5
8 128 1000 1000 0.5 1
9 128 1000 1000 0.05 2

posted @ 2018-11-28 12:48 UniversalAIPlatform 阅读(...) 评论(...) 编辑 收藏