《深度学习入门-基于Python的理论与实现》读书笔记-05

ch05

ReLU层

class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout

        return dx

1.self.mask = (x <= 0)

这个变量mask是由True/False构成的NumPy数组,它会把正向传播时的输入x的元素中小于等于0的地方保存为True,其他地方(大于0的元素)保存为False。

>>> x = np.array( [[1.0, -0.5], [-2.0, 3.0]] )
>>> print(x)
[[ 1. -0.5]
 [-2. 3. ]]
>>> mask = (x <= 0)
>>> print(mask)
[[False True]
 [ True False]]

2.out = x.copy()

深度复制,复制一个和数组x完全一样的数组out

3.out[self.mask] = 0

将out数组中小于等于0的都赋值为0

这样返回的数组out中大于0的没变,小于等于0的都变为0

Sigmoid层

class Sigmoid:
 def __init__(self):
 self.out = None
 def forward(self, x):
 out = 1 / (1 + np.exp(-x))
 self.out = out
 return out
 def backward(self, dout):
 dx = dout * (1.0 - self.out) * self.out
 return dx

Affine层( np.dot(X, W) + B的反向传播)

class Affine:
    def __init__(self, W, b):
        self.W =W
        self.b = b
        
        self.x = None
        self.original_x_shape = None
        # 权重和偏置参数的导数
        self.dW = None
        self.db = None

    def forward(self, x):
        # 对应张量
        self.original_x_shape = x.shape
        x = x.reshape(x.shape[0], -1)
        self.x = x

        out = np.dot(self.x, self.W) + self.b

        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        
        dx = dx.reshape(*self.original_x_shape)  # 还原输入数据的形状(对应张量)
        return dx

Softmax-with-Loss 层(softmax()和cross_entropy_ error()函数的反向传播)

class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None # softmax的输出
        self.t = None # 监督数据

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        
        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        if self.t.size == self.y.size: # 监督数据是one-hot-vector的情况
            dx = (self.y - self.t) / batch_size
        else:
            dx = self.y.copy()
            dx[np.arange(batch_size), self.t] -= 1
            dx = dx / batch_size
        
        return dx

 

posted @ 2020-11-14 12:00  向阳而生w  阅读(376)  评论(0)    收藏  举报