《深度学习入门-基于Python的理论与实现》读书笔记-04

学习算法的实现

1. 2层神经网络的类

 1 # coding: utf-8
 2 import sys, os
 3 sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
 4 from common.functions import *
 5 from common.gradient import numerical_gradient
 6 
 7 
 8 class TwoLayerNet:
 9 
10 def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
11 # 初始化权重
12 self.params = {}
13 self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
14 self.params['b1'] = np.zeros(hidden_size)
15 self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
16 self.params['b2'] = np.zeros(output_size)
17 
18 def predict(self, x):
19 W1, W2 = self.params['W1'], self.params['W2']
20 b1, b2 = self.params['b1'], self.params['b2']
21 
22 a1 = np.dot(x, W1) + b1
23 z1 = sigmoid(a1)
24 a2 = np.dot(z1, W2) + b2
25 y = softmax(a2)
26 
27 return y
28 
29 # x:输入数据, t:监督数据
30 def loss(self, x, t):
31 y = self.predict(x)
32 
33 return cross_entropy_error(y, t)
34 
35 def accuracy(self, x, t):
36 y = self.predict(x)
37 y = np.argmax(y, axis=1)
38 t = np.argmax(t, axis=1)
39 
40 accuracy = np.sum(y == t) / float(x.shape[0])
41 return accuracy
42 
43 # x:输入数据, t:监督数据
44 def numerical_gradient(self, x, t):
45 loss_W = lambda W: self.loss(x, t)
46 
47 grads = {}
48 grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
49 grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
50 grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
51 grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
52 
53 return grads
54 
55 def gradient(self, x, t):
56 W1, W2 = self.params['W1'], self.params['W2']
57 b1, b2 = self.params['b1'], self.params['b2']
58 grads = {}
59 
60 batch_num = x.shape[0]
61 
62 # forward
63 a1 = np.dot(x, W1) + b1
64 z1 = sigmoid(a1)
65 a2 = np.dot(z1, W2) + b2
66 y = softmax(a2)
67 
68 # backward
69 dy = (y - t) / batch_num
70 grads['W2'] = np.dot(z1.T, dy)
71 grads['b2'] = np.sum(dy, axis=0)
72 
73 da1 = np.dot(dy, W2.T)
74 dz1 = sigmoid_grad(a1) * da1
75 grads['W1'] = np.dot(x.T, dz1)
76 grads['b1'] = np.sum(dz1, axis=0)
77 
78 return grads

涉及知识点:

1.numpy.random.randn(d0,d1,…,dn)

  • randn函数返回一个或一组样本,具有标准正态分布。

  • dn表格每个维度

  • 返回值为指定维度的array

 1 np.random.randn(2,4) 

1 array([[ 0.27795239, -2.57882503, 0.3817649 , 1.42367345], [-1.16724625, -0.22408299, 0.63006614, -0.41714538]])

标准正态分布介绍

  • 标准正态分布—-standard normal distribution

  • 标准正态分布又称为u分布,是以0为均值、以1为标准差的正态分布,记为N(0,1)。

 

2.numpy.zeros(shape,dtype = float,order ='C')

返回具有输入形状和类型的零数组。(生成相应大小的零矩阵)

 

3.numpy.argmax(array, axis) 用于返回一个numpy数组中最大值的索引值。

多维数组np.argmax(a, axis=1)

列方向比较,此时就是指在每个矩阵内部的列方向上进行比较

 1 a = np.array([
 2 [
 3 [1, 5, 5, 2],
 4 [9, -6, 2, 8],
 5 [-3, 7, -9, 1]
 6 ],
 7 
 8 [
 9 [-1, 7, -5, 2],
10 [9, 6, 2, 8],
11 [3, 7, 9, 1]
12 ],
13 [
14 [21, 6, -5, 2],
15 [9, 36, 2, 8],
16 [3, 7, 79, 1]
17 ]
18 ])
19 c=np.argmax(a, axis=1)#对于三维度矩阵,a有三个方向a[0][1][2]
20 #当axis=1时,是在a[1]方向上找最大值,即在列方向比较,此时就是指在每个矩阵内部的列方向上进行比较
21 #(1)看第一个矩阵
22 # [1, 5, 5, 2],
23 # [9, -6, 2, 8],
24 # [-3, 7, -9, 1]
25 #比较每一列的最大值,可以看出第一列1,9,-3最大值为9,,索引值为1
26 #第2列5,-6,7最大值为7,,索引值为2
27 # 因此对第一个矩阵,找出索引结果为[1,2,0,1]
28 #再拿出2个,按照上述方法,得出比较结果 [1 0 2 1]
29 #一共有三个,所以最终得到的结果b就为3行4列矩阵
30 print(c)
31 #[[1 2 0 1]
32 # [1 0 2 1]

 

4.loss_W = lambda W: self.loss(x, t)

1 f = lambda x:my_test(x) # 代码1
2 等价于
3 def f(x):
4 return my_test(x)

这里利用lambda匿名函数定义了一个函数f,匿名函数转正,有名字了,函数形参为x,函数体是冒号后面的部分。注意这里只是定义了函数f,没有进行调用。

 

5.x.shape[0]

1 import numpy as np
2 x = np.array([[1,2,5],[2,3,5],[3,4,5],[2,3,6]])
3 #输出数组的行和列数
4 print x.shape #结果: (4, 3)
5 #只输出行数
6 print x.shape[0] #结果: 4
7 #只输出列数
8 print x.shape[1] #结果: 3

 

6.np.dot(x,y) ———x是m*n 矩阵 ,y是n*m矩阵

​ 得到的是矩阵积

 

2. mini-batch的实现

 1 # coding: utf-8
 2 import sys, os
 3 sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
 4 import numpy as np
 5 import matplotlib.pyplot as plt
 6 from dataset.mnist import load_mnist
 7 from two_layer_net import TwoLayerNet
 8 
 9 # 读入数据
10 (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
11 
12 network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
13 
14 iters_num = 10000 # 适当设定循环的次数
15 train_size = x_train.shape[0]
16 batch_size = 100
17 learning_rate = 0.1
18 
19 train_loss_list = []
20 train_acc_list = []
21 test_acc_list = []
22 
23 iter_per_epoch = max(train_size / batch_size, 1)
24 
25 for i in range(iters_num):
26 batch_mask = np.random.choice(train_size, batch_size)
27 x_batch = x_train[batch_mask]
28 t_batch = t_train[batch_mask]
29 
30 # 计算梯度
31 #grad = network.numerical_gradient(x_batch, t_batch)
32 grad = network.gradient(x_batch, t_batch)
33 
34 # 更新参数
35 for key in ('W1', 'b1', 'W2', 'b2'):
36 network.params[key] -= learning_rate * grad[key]
37 
38 loss = network.loss(x_batch, t_batch)
39 train_loss_list.append(loss)
40 
41 if i % iter_per_epoch == 0:
42 train_acc = network.accuracy(x_train, t_train)
43 test_acc = network.accuracy(x_test, t_test)
44 train_acc_list.append(train_acc)
45 test_acc_list.append(test_acc)
46 print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))
47 
48 # 绘制图形
49 markers = {'train': 'o', 'test': 's'}
50 x = np.arange(len(train_acc_list))
51 plt.plot(x, train_acc_list, label='train acc')
52 plt.plot(x, test_acc_list, label='test acc', linestyle='--')
53 plt.xlabel("epochs")
54 plt.ylabel("accuracy")
55 plt.ylim(0, 1.0)
56 plt.legend(loc='lower right')
57 plt.show()

1.train_loss_list.append(loss)

将每次计算出的loss添加到train_loss_list中

1 a = []
2 a.append(['A','B','C'])#给列表添加列表
3 a.append('a')#给列表添加字符串,字符串必须引号
4 a.append(5)#给列表添加整数,浮点都不用加引号,加引号就转换成字符串了
5 print(a)
6 运行结果为
7 
8 [['A', 'B', 'C'], 'a', 5]

 

posted @ 2020-10-31 15:18  向阳而生w  阅读(445)  评论(0)    收藏  举报