搭建神经网络


'''
Author: huajia
Date: 2021-11-26 09:42:03
LastEditors: huajia
LastEditTime: 2021-12-01 17:30:58
Description: 略略略
'''


import numpy as np
import matplotlib.pyplot as plt


def load_planar_dataset():
    np.random.seed(1)
    m = 400  # number of examples
    N = int(m/2)  # number of points per class
    D = 2  # dimensionality
    X = np.zeros((m, D))  # data matrix where each row is a single example
    # labels vector (0 for red, 1 for blue)
    Y = np.zeros((m, 1), dtype='uint8')
    a = 4  # maximum ray of the flower
    for j in range(2):
        ix = range(N*j, N*(j+1))
        t = np.linspace(j*3.12, (j+1)*3.12, N) + \
            np.random.randn(N)*0.2  # theta
        r = a*np.sin(4*t) + np.random.randn(N)*0.2  # radius
        X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
        Y[ix] = j
    X = X.T
    Y = Y.T
    return X, Y


def load_test_planar_dataset():
    np.random.seed(2)
    m = 400  # number of examples
    N = int(m/2)  # number of points per class
    D = 2  # dimensionality
    X = np.zeros((m, D))  # data matrix where each row is a single example
    # labels vector (0 for red, 1 for blue)
    Y = np.zeros((m, 1), dtype='uint8')
    a = 4  # maximum ray of the flower
    for j in range(2):
        ix = range(N*j, N*(j+1))
        t = np.linspace(j*3.12, (j+1)*3.12, N) + \
            np.random.randn(N)*0.2  # theta
        r = a*np.sin(4*t) + np.random.randn(N)*0.2  # radius
        X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
        Y[ix] = j
    X = X.T
    Y = Y.T
    return X, Y


def initialize_parameters(n_x, n_h, n_y):
    """
    参数:
        n_x - 输入节点的数量
        n_h - 隐藏层节点的数量
        n_y - 输出层节点的数量

    返回:
        parameters - 包含参数的字典:
        W1 - 权重矩阵,维度为(n_h,n_x)
        b1 - 偏向量,维度为(n_h,1)
        W2 - 权重矩阵,维度为(n_y,n_h)
        b2 - 偏向量,维度为(n_y,1)
    """
    W1 = np.random.rand(n_h, n_x)*0.01
    b1 = np.zeros(shape=(n_h, 1))
    W2 = np.random.rand(n_y, n_h)*0.01
    b2 = np.zeros(shape=(n_y, 1))
    # 使用断言确保我的数据格式是正确的
    assert(W1.shape == (n_h, n_x))
    assert(b1.shape == (n_h, 1))
    assert(W2.shape == (n_y, n_h))
    assert(b2.shape == (n_y, 1))

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}

    return parameters


def sigmoid(z):
    '''
    description: sigmoid函数
    param {*} z
    return {*}
    '''
    a = 1.0/(1+np.exp(-z))
    return a


def relu(z):
    '''
    description:relu激活函数 
    param {*}
    return {*}
    '''
    a = np.maximum(z, 0)
    return a


def cost_fun(Y, A, m):
    '''
    description: 交叉熵成本函数
    param {*} Y
    param {*} A
    return {*} cost
    '''
    delta = 1e-10
    cost = -(np.sum((Y*np.log(A+delta)+(1-Y)*np.log(1-A+delta))))/m
    cost = float(np.squeeze(cost))
    assert(isinstance(cost, float))
    return cost


def forward_propagation(X, parameters):
    '''
    description: 向前传播函数
    param {*} X
    param {*} parameters
    return {*} A2
    return {*} cache :Z1,A1,Z2,A2
    '''
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    # 前向传播计算A2
    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    # 使用断言确保我的数据格式是正确的
    assert(A2.shape == (1, X.shape[1]))
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}

    return (A2, cache)


def backward_propagation(X, Y, cache, parameters):
    '''
    description: 反向传播函数
    param {*} X
    param {*} Y
    param {*} cache
    param {*} parameters
    return {*} grads :dW1,db1,dW2,db2
    '''
    m = X.shape[1]
    W2 = parameters["W2"]
    A1 = cache["A1"]
    A2 = cache["A2"]
    dZ2 = A2 - Y
    dW2 = (1 / m) * np.dot(dZ2, A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))
    dW1 = (1 / m) * np.dot(dZ1, X.T)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    return grads


def optimize(parameters, grads, learning_rate):
    '''
    description: 优化函数
    param {*} parameters
    param {*} grads
    param {*} learning_rate
    return {*} parameters
    '''
    W1, W2 = parameters["W1"], parameters["W2"]
    b1, b2 = parameters["b1"], parameters["b2"]
    dW1, dW2 = grads["dW1"], grads["dW2"]
    db1, db2 = grads["db1"], grads["db2"]
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    return parameters


def predict(X, parameters):
    '''
    description: 预测函数
    param {*} X
    param {*} parameters
    return {*} A2
    '''
    A2, cache = forward_propagation(X, parameters)
    return A2


def plot_decision_boundary(model, X, y):
    # Set min and max values and give it some padding
    x_min, x_max = X[0, :].min() - 1, X[0, :].max() + 1
    y_min, y_max = X[1, :].min() - 1, X[1, :].max() + 1
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    # Predict the function value for the whole grid
    Z = model(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.ylabel('x2')
    plt.xlabel('x1')
    plt.scatter(X[0, :], X[1, :], c=np.squeeze(y), cmap=plt.cm.Spectral)


def train_model(X, Y, n_h, num_iterations, learning_rate):
    '''
    description: 训练模型
    param {*} X
    param {*} Y
    param {*} n_h
    param {*} num_iterations
    param {*} learning_rate
    return {*}
    '''
    n_x = X.shape[0]
    n_y = 1
    m = Y.size
    cost_list = []
    parameters = initialize_parameters(n_x, n_h, n_y)
    for i in range(num_iterations):
        A2, cache = forward_propagation(X, parameters)
        cost = cost_fun(Y, A2, m)
        cost_list.append(cost)
        if(i % 1000 == 0):
            # learning_rate /= 1.01
            predictions = predict(X, parameters)
            print('第%d轮:' % (i), 'cost:', cost, '准确率: %f' % float((np.dot(
                Y, predictions.T) + np.dot(1 - Y, 1 - predictions.T)) / float(m) * 100) + '%','学习效率:',learning_rate)
        grads = backward_propagation(X, Y, cache, parameters)
        parameters = optimize(parameters, grads, learning_rate)
    # np.savez_compressed('./train_model.npz')
    plt.plot(np.arange(0, num_iterations), cost_list, label="cost")
    plt.legend()
    plt.show()
    res = predict(X, parameters)
    # predictions = np.round(res)
    predictions = res
    print('准确率: %f' % float((np.dot(Y, predictions.T) +
          np.dot(1 - Y, 1 - predictions.T)) / float(m) * 100) + '%')
    test_model(parameters)
    # plot_decision_boundary(lambda x: predict(x.T, parameters), X, Y)
    # plt.show()
    return parameters


def test_model(parameters):
    '''
    description: 用测试集的数据测试模型
    param {*} parameters
    param {*} Y
    return {*}
    '''
    test_X, test_Y = load_test_planar_dataset()
    res = predict(test_X, parameters)
    # predictions = np.round(res)
    predictions = res
    print('测试准确率: %f' % float((np.dot(test_Y, predictions.T) +
          np.dot(1 - test_Y, 1 - predictions.T)) / float(m) * 100) + '%')
    plt.figure(figsize=(8, 8))
    plot_decision_boundary(lambda x: predict(x.T, parameters), test_X, test_Y)
    plt.figure(figsize=(8, 8))
    plt.scatter(test_X[0, :], test_X[1, :], c=np.squeeze(test_Y), s=40, cmap=plt.cm.Spectral)  # 绘制散点图
    plt.show()


if __name__ == '__main__':
    X, Y = load_planar_dataset()
    # plt.figure(figsize=(8, 8))
    # plt.scatter(X[0, :], X[1, :], c=np.squeeze(Y), s=40, cmap=plt.cm.Spectral)  # 绘制散点图
    # plt.show()
    # exit()
    shape_X = X.shape
    shape_Y = Y.shape
    m = Y.shape[1]  # 训练集里面的数量

    train_model(X, Y, 5, 500000, 10)
    exit()

    print("X的维度为: " + str(shape_X))
    print("Y的维度为: " + str(shape_Y))
    print("数据集里面的数据有:" + str(m) + " 个")

    '''
    X的维度为: (2, 400)
    Y的维度为: (1, 400)
    数据集里面的数据有:400 个
    '''

posted @ 2021-12-01 16:36  花嫁sama  阅读(66)  评论(0)    收藏  举报