《深度学习入门-基于Python的理论与实现》读书笔记-04
学习算法的实现
1. 2层神经网络的类
1 # coding: utf-8 2 import sys, os 3 sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 4 from common.functions import * 5 from common.gradient import numerical_gradient 6 7 8 class TwoLayerNet: 9 10 def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): 11 # 初始化权重 12 self.params = {} 13 self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) 14 self.params['b1'] = np.zeros(hidden_size) 15 self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) 16 self.params['b2'] = np.zeros(output_size) 17 18 def predict(self, x): 19 W1, W2 = self.params['W1'], self.params['W2'] 20 b1, b2 = self.params['b1'], self.params['b2'] 21 22 a1 = np.dot(x, W1) + b1 23 z1 = sigmoid(a1) 24 a2 = np.dot(z1, W2) + b2 25 y = softmax(a2) 26 27 return y 28 29 # x:输入数据, t:监督数据 30 def loss(self, x, t): 31 y = self.predict(x) 32 33 return cross_entropy_error(y, t) 34 35 def accuracy(self, x, t): 36 y = self.predict(x) 37 y = np.argmax(y, axis=1) 38 t = np.argmax(t, axis=1) 39 40 accuracy = np.sum(y == t) / float(x.shape[0]) 41 return accuracy 42 43 # x:输入数据, t:监督数据 44 def numerical_gradient(self, x, t): 45 loss_W = lambda W: self.loss(x, t) 46 47 grads = {} 48 grads['W1'] = numerical_gradient(loss_W, self.params['W1']) 49 grads['b1'] = numerical_gradient(loss_W, self.params['b1']) 50 grads['W2'] = numerical_gradient(loss_W, self.params['W2']) 51 grads['b2'] = numerical_gradient(loss_W, self.params['b2']) 52 53 return grads 54 55 def gradient(self, x, t): 56 W1, W2 = self.params['W1'], self.params['W2'] 57 b1, b2 = self.params['b1'], self.params['b2'] 58 grads = {} 59 60 batch_num = x.shape[0] 61 62 # forward 63 a1 = np.dot(x, W1) + b1 64 z1 = sigmoid(a1) 65 a2 = np.dot(z1, W2) + b2 66 y = softmax(a2) 67 68 # backward 69 dy = (y - t) / batch_num 70 grads['W2'] = np.dot(z1.T, dy) 71 grads['b2'] = np.sum(dy, axis=0) 72 73 da1 = np.dot(dy, W2.T) 74 dz1 = sigmoid_grad(a1) * da1 75 grads['W1'] = np.dot(x.T, dz1) 76 grads['b1'] = np.sum(dz1, axis=0) 77 78 return grads
涉及知识点:
1.numpy.random.randn(d0,d1,…,dn)
- 
randn函数返回一个或一组样本,具有标准正态分布。
 - 
dn表格每个维度
 - 
返回值为指定维度的array
 
1 np.random.randn(2,4)
1 array([[ 0.27795239, -2.57882503, 0.3817649 , 1.42367345], [-1.16724625, -0.22408299, 0.63006614, -0.41714538]])
标准正态分布介绍
- 
标准正态分布—-standard normal distribution
 - 
标准正态分布又称为u分布,是以0为均值、以1为标准差的正态分布,记为N(0,1)。
 
2.numpy.zeros(shape,dtype = float,order ='C')
返回具有输入形状和类型的零数组。(生成相应大小的零矩阵)
多维数组np.argmax(a, axis=1)
在列方向比较,此时就是指在每个矩阵内部的列方向上进行比较
1 a = np.array([ 2 [ 3 [1, 5, 5, 2], 4 [9, -6, 2, 8], 5 [-3, 7, -9, 1] 6 ], 7 8 [ 9 [-1, 7, -5, 2], 10 [9, 6, 2, 8], 11 [3, 7, 9, 1] 12 ], 13 [ 14 [21, 6, -5, 2], 15 [9, 36, 2, 8], 16 [3, 7, 79, 1] 17 ] 18 ]) 19 c=np.argmax(a, axis=1)#对于三维度矩阵,a有三个方向a[0][1][2] 20 #当axis=1时,是在a[1]方向上找最大值,即在列方向比较,此时就是指在每个矩阵内部的列方向上进行比较 21 #(1)看第一个矩阵 22 # [1, 5, 5, 2], 23 # [9, -6, 2, 8], 24 # [-3, 7, -9, 1] 25 #比较每一列的最大值,可以看出第一列1,9,-3最大值为9,,索引值为1 26 #第2列5,-6,7最大值为7,,索引值为2 27 # 因此对第一个矩阵,找出索引结果为[1,2,0,1] 28 #再拿出2个,按照上述方法,得出比较结果 [1 0 2 1] 29 #一共有三个,所以最终得到的结果b就为3行4列矩阵 30 print(c) 31 #[[1 2 0 1] 32 # [1 0 2 1]
4.loss_W = lambda W: self.loss(x, t)
1 f = lambda x:my_test(x) # 代码1 2 等价于 3 def f(x): 4 return my_test(x)
这里利用lambda匿名函数定义了一个函数f,匿名函数转正,有名字了,函数形参为x,函数体是冒号后面的部分。注意这里只是定义了函数f,没有进行调用。
5.x.shape[0]
1 import numpy as np 2 x = np.array([[1,2,5],[2,3,5],[3,4,5],[2,3,6]]) 3 #输出数组的行和列数 4 print x.shape #结果: (4, 3) 5 #只输出行数 6 print x.shape[0] #结果: 4 7 #只输出列数 8 print x.shape[1] #结果: 3
6.np.dot(x,y) ———x是m*n 矩阵 ,y是n*m矩阵
 得到的是矩阵积
1 # coding: utf-8 2 import sys, os 3 sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 4 import numpy as np 5 import matplotlib.pyplot as plt 6 from dataset.mnist import load_mnist 7 from two_layer_net import TwoLayerNet 8 9 # 读入数据 10 (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) 11 12 network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) 13 14 iters_num = 10000 # 适当设定循环的次数 15 train_size = x_train.shape[0] 16 batch_size = 100 17 learning_rate = 0.1 18 19 train_loss_list = [] 20 train_acc_list = [] 21 test_acc_list = [] 22 23 iter_per_epoch = max(train_size / batch_size, 1) 24 25 for i in range(iters_num): 26 batch_mask = np.random.choice(train_size, batch_size) 27 x_batch = x_train[batch_mask] 28 t_batch = t_train[batch_mask] 29 30 # 计算梯度 31 #grad = network.numerical_gradient(x_batch, t_batch) 32 grad = network.gradient(x_batch, t_batch) 33 34 # 更新参数 35 for key in ('W1', 'b1', 'W2', 'b2'): 36 network.params[key] -= learning_rate * grad[key] 37 38 loss = network.loss(x_batch, t_batch) 39 train_loss_list.append(loss) 40 41 if i % iter_per_epoch == 0: 42 train_acc = network.accuracy(x_train, t_train) 43 test_acc = network.accuracy(x_test, t_test) 44 train_acc_list.append(train_acc) 45 test_acc_list.append(test_acc) 46 print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc)) 47 48 # 绘制图形 49 markers = {'train': 'o', 'test': 's'} 50 x = np.arange(len(train_acc_list)) 51 plt.plot(x, train_acc_list, label='train acc') 52 plt.plot(x, test_acc_list, label='test acc', linestyle='--') 53 plt.xlabel("epochs") 54 plt.ylabel("accuracy") 55 plt.ylim(0, 1.0) 56 plt.legend(loc='lower right') 57 plt.show()
1.train_loss_list.append(loss)
将每次计算出的loss添加到train_loss_list中
1 a = [] 2 a.append(['A','B','C'])#给列表添加列表 3 a.append('a')#给列表添加字符串,字符串必须引号 4 a.append(5)#给列表添加整数,浮点都不用加引号,加引号就转换成字符串了 5 print(a) 6 运行结果为 7 8 [['A', 'B', 'C'], 'a', 5]
                    
                
                
            
        
浙公网安备 33010602011771号