作业 03 - 深层神经网络

深层神经网络

最重要的是要让整个过程向量化以加快运行速度,这里给出矩阵的大小.    

假设一共有 $\mathrm{L}$ 层,$\mathrm{m}$ 个训练样本,每一层的节点数量为 $\mathrm{n[l]}$        

$\mathrm{W[l] = (n[l], n[l - 1])}$, $\mathrm{b[l] = (n[l], 1)}$, $\mathrm{Z[l] = A[l] = (n[l], m)}$     

正向传播公式

$\mathrm{Z[l] = W[l]A[l - 1] + b[l]}$, $\mathrm{A[l] = g(Z[l])}$

反向传播公式

注:在 $\mathrm{A[L]}$ 对 $\mathrm{Cost}$ 求导时没有算进 $\mathrm{\frac{1}{m}}$

根据后续的需要进行的除法.   

$\mathrm{d(Z[l])=d(A[l]) * g[l]'(Z[l])}$

$\mathrm{d(W[l]) = \frac{1}{m} d(Z[l]) A[l - 1] ^{T}}$ 

$\mathrm{d(b[l]) = \frac{1}{m} np.sum(d(Z[l]), axis=1, keepdims=True)}$ 

$\mathrm{d(A[l - 1]) = W[l] ^T d(Z[l])}$ 

代码

这份代码里封装了 $\mathrm{ANNs}$ 函数,只需要输入每一层的节点数量,激活函数类型,程序就会根据训练数据学习.  

最佳的参数需要自己一步一步去调,同时学习次数不宜过多防止过拟合的事情发生.  

学习效果:

 

调用代码:

from inspect import Parameter
import numpy as np
import h5py
import matplotlib.pyplot as plt
from lr_utils import load_dataset   
from beta import ANNs, calculate  
train_x, train_y, test_x, test_y, classes = load_dataset() 
train_x = train_x.reshape(train_x.shape[0], -1).T / 255
test_x  = test_x.reshape(test_x.shape[0], -1).T   / 255

X , Y = train_x, train_y

acti = ["ng", "relu",  "relu", "relu", "sigmoid"]   

layer = [X.shape[0], 20, 20, 5, 1]      

ANNs(X, Y, test_x, test_y, layer, acti, 10000, 0.0075)   

  

$\mathrm{ANNs}$ 代码

import numpy as np
import matplotlib.pyplot as plt
import h5py 

def sigmoid(x):  
    return 1.0 / (1.0 + np.exp(-x))   

def dsigmoid(y):  
    return y * (1 - y)        

def relu(x):  
    return np.maximum(0.0, x)    

def drelu(y): 
    return (y > 0).astype('int')  

def tanh(x):  
    return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))  

def dtanh(y):
    return 1 - np.power(y, 2)

# 随机初始化 W, b 的值.      

def initialize(layer_dims):  
    np.random.seed(2)  
    L = len(layer_dims)  
    parameters = {}  
    for l in range(1, L): 
        parameters["W" + str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) / np.sqrt(layer_dims[l - 1])
        parameters["b" + str(l)] = np.random.randn(layer_dims[l], 1) * 0.01       
    return parameters   

# 向前函数.  

def linear_forward(A_prev, W, b):   
    """
    计算出导出的 z[l] 

    cashe[0] = A_prev 

    cashe[1] = W 

    cashe[2] = b 

    cashe 保存 (A_prev, W, b)

    """  

    z = np.dot(W, A_prev) + b

    cashe = (A_prev, W, b) 
    return z, cashe 

def forward(A_prev, W, b, activation_type):  
    """
    返回:

    A[l] 

    g[l]'(z[l])  

    z[l]  

    linear_cashe
    
    """
    z, linear_cashe = linear_forward(A_prev, W, b)        
    if activation_type == "sigmoid":      
        A_nex = sigmoid(z)
        delta = dsigmoid(A_nex)  
    if activation_type == "relu":  
        A_nex = relu(z)  
        delta = drelu(A_nex)  
    if activation_type == "tanh":  
        A_nex = tanh(z)   
        delta = dtanh(A_nex) 
    return A_nex, delta, z, linear_cashe  

def backward(dAL, delta, z, linear_cashe):   
    dZL = dAL * delta  
    dWL = (1 / dAL.shape[1]) * np.dot(dZL, linear_cashe[0].T)            
    dbL = (1 / dAL.shape[1]) * np.sum(dZL, axis = 1, keepdims = True)   
    dA_prev = np.dot(linear_cashe[1].T, dZL)               
    return dZL, dWL, dbL, dA_prev  

def calculate(X, parameters, layer_type):  

    L = len(layer_type)     
    A = X  
    for i in range(1, L):   
        A = np.dot(parameters["W" + str(i)], A) + parameters["b" + str(i)]   
        if layer_type[i] == "sigmoid": 
            A = sigmoid(A)  
        if layer_type[i] == "relu":  
            A = relu(A)  
        if layer_type[i] == "tanh":  
            A = tanh(A)  
    return A  

# 输入训练数据与测试数据方便监测学习过程.  

def ANNs(X, Y, tx, ty, layer_dims,acti_type,steps = 1000, lr = 0.09):  

    parameters = initialize(layer_dims)     
    # data 用来存储数据       
    for i in range(steps):      
        A = X  
        L = len(layer_dims)   
        data = []  
        data.append(())  
        for l in range(1, L):             
            A_nex,delta,z,linear_cashe = forward(A, parameters["W"+str(l)], parameters["b"+str(l)], acti_type[l])   
            data.append((A_nex, delta, z, linear_cashe))    
            A = A_nex   

        dAL = -(Y/(A + 1e-9)) + (1 - Y) / (1 - A + 1e-9)         

        for l in range(L - 1, 0, -1):     
            dZL, dWL, dbL, dA_prev = backward(dAL, data[l][1], data[l][2], data[l][3])        
            parameters["W" + str(l)] -= lr * dWL   
            parameters["b" + str(l)] -= lr * dbL   
            dAL = dA_prev    
        
        # 每隔 20 次输出正确率.  
        if i % 20 == 0:  
            cc = 0  
            z = (calculate(X, parameters, acti_type) >= 0.5).astype('int')   
            for j in range(Y.shape[1]):  
                if z[0][j] == Y[0][j]:   
                    cc = cc + 1        
            z = (calculate(tx, parameters, acti_type) >= 0.5).astype('int')  
            pp = 0 
            for j in range(ty.shape[1]):  
                if z[0][j] == ty[0][j]:  
                   pp = pp + 1  
            print("训练次数 " + str(i + 1) + ": ")  
            print("        训练准确率: " + str(cc / Y.shape[1] * 100) + "%")
            print("        测试准确率: " + str(pp / ty.shape[1] * 100) + "%")      
            print("")    
   
    return parameters 

  

posted @ 2022-03-04 20:03  guangheli  阅读(48)  评论(0)    收藏  举报