损失函数-06
在深度学习中, 损失反映模型最后预测结果与实际真值之间的差距, 可以用来分析训练过程的好坏、 模型是否收敛等, 例如均方损失、交叉熵损失等。 在PyTorch中, 损失函数可以看做是网络的某一层而放到模型定义中, 但在实际使用时更偏向于作为功能函数而放到前向传播过程中。
PyTorch在torch.nn及torch.nn.functional中都提供了各种损失函数,通常来讲, 由于损失函数不含有可学习的参数, 因此这两者在功能上基本没有区别。
perception.py

1 import torch 2 from torch import nn 3 4 # 首先建立一个全连接的子module, 继承nn.Module 5 class Linear(nn.Module): 6 7 def __init__(self, in_dim, out_dim): 8 super(Linear, self).__init__() # 调用nn.Module的构造函数 9 10 # 使用nn.Parameter来构造需要学习的参数 11 self.w = nn.Parameter(torch.randn(in_dim, out_dim)) 12 self.b = nn.Parameter(torch.randn(out_dim)) 13 14 def forward(self, x): 15 x = x.matmul(self.w) # 使用Tensor.matmul实现矩阵相乘 16 y = x +self.b.expand_as(x) # 使用Tensor.expand_as()来保证矩阵 17 18 # 形状一致 19 return y 20 21 # 构建感知机类, 继承nn.Module, 并调用了Linear的子module 22 class Perception(nn.Module): 23 def __init__(self, in_dim, hid_dim, out_dim): 24 super(Perception, self).__init__() 25 self.layer1 = Linear(in_dim, hid_dim) 26 self.layer2 = Linear(hid_dim, out_dim) 27 28 def forward(self, x): 29 x = self.layer1(x) 30 y = torch.sigmoid(x) 31 32 y = self.layer2(y) 33 y = torch.sigmoid(y) 34 35 return y
test.py

1 import torch 2 from perception import Perception 3 4 from torch import nn 5 import torch.nn.functional as F 6 7 # 实例化一个网络, 并赋值全连接中的维数, 最终输出二维代表了二分类 8 p = Perception(2, 3, 2) 9 # 可以看到perception中包含上述定义的layer1与layer2 10 11 # named_parameters()可以返回学习参数的迭代器, 分别为参数名与参数值 12 for name, parameter in p.named_parameters(): 13 print(name, parameter) 14 15 16 # 随机生成数据, 注意这里的4代表了样本数为4, 每个样本有两维 17 data = torch.randn(4, 2) 18 print(data, data.size()) 19 20 # 将输入数据传入p, p()相当于调用perception中的forward()函数 21 output = p(data) 22 print(output) 23 24 # 设置标签, 由于是二分类, 一共有4个样本, 因此标签维度为1*4, 每个数为0或1两个类别 25 label = torch.Tensor([0, 1, 1, 0]).long() 26 # 实例化nn中的交叉熵损失类 27 criterion = nn.CrossEntropyLoss() 28 # 实例化nn中的交叉熵损失类 29 loss_nn = criterion(output, label) 30 print(loss_nn) 31 32 # 由于F.cross_entropy是一个函数, 因此可以直接调用, 不需要实例化, 两者求得的损失值相同 33 loss_functional = F.cross_entropy(output, label) 34 print(loss_functional)
perception-new.py
注:主要介绍了nn.CrossEntropyLoss()的等价计算方式(三步骤)

1 # import torch 2 # from perception_sequential import Perception 3 4 # model = Perception(100, 1000, 10) 5 6 # print(model) 7 8 # data = torch.randn(5,100) 9 # output = model(data) 10 11 # print(data) 12 # print(output) 13 14 import torch 15 from torch import nn 16 17 from perception import Perception 18 import torch.nn.functional as F 19 20 21 p = Perception(2, 3, 2) 22 23 print(p) 24 25 >> Perception( 26 (layer1): Linear() 27 (layer2): Linear() 28 ) 29 30 for name, parameters in p.named_parameters(): 31 print(name, parameters) 32 33 >> layer1.w Parameter containing: 34 tensor([[-1.0572e+00, 4.9161e-01, 1.7658e+00], 35 [ 4.7404e-01, 5.6086e-04, 3.2858e-02]], requires_grad=True) 36 layer1.b Parameter containing: 37 tensor([ 1.4106, -0.2539, 0.1568], requires_grad=True) 38 layer2.w Parameter containing: 39 tensor([[-0.9253, -0.3660], 40 [ 1.3147, 1.3661], 41 [ 0.5274, -0.3836]], requires_grad=True) 42 layer2.b Parameter containing: 43 tensor([ 0.0343, -0.4531], requires_grad=True) 44 45 data = torch.randn(4, 2) 46 print(data, data.size(), data.grad_fn) 47 >> tensor([[-1.1252, 1.3828], 48 [ 0.1377, 0.3665], 49 [ 0.5247, 0.4930], 50 [ 1.5374, 0.1016]]) torch.Size([4, 2]) None 51 52 53 output = p(data) 54 print(output, output.size()) 55 >> tensor([[0.4073, 0.3920], 56 [0.5498, 0.4110], 57 [0.5978, 0.4182], 58 [0.7167, 0.4668]], grad_fn=<SigmoidBackward0>) torch.Size([4, 2]) 59 60 label = torch.Tensor([0,1,1,0]).long() 61 print(label, label.size()) 62 >> tensor([0, 1, 1, 0]) torch.Size([4]) 63 64 criterion = nn.CrossEntropyLoss() 65 loss_nn = criterion(output, label) 66 print(loss_nn) 67 >> tensor(0.7034, grad_fn=<NllLossBackward0>) 68 69 loss_functional = F.cross_entropy(output, label) 70 print(loss_functional) 71 >> tensor(0.7034, grad_fn=<NllLossBackward0>) 72 73 print(output) 74 >> tensor([[0.4073, 0.3920], 75 [0.5498, 0.4110], 76 [0.5978, 0.4182], 77 [0.7167, 0.4668]], grad_fn=<SigmoidBackward0>) 78 79 # nn.CrossEntropyLoss()交叉熵等价于如下的计算方式 80 # 1. 计算softmax 81 softmax = nn.Softmax() 82 output_softmax = softmax(output) 83 print(output_softmax) 84 >> tensor([[0.5038, 0.4962], 85 [0.5347, 0.4653], 86 [0.5448, 0.4552], 87 [0.5622, 0.4378]], grad_fn=<SoftmaxBackward0>) 88 89 # 2. 计算log 90 output_log = torch.log(output_softmax) 91 print(output_log) 92 >> tensor([[-0.6855, -0.7008], 93 [-0.6261, -0.7650], 94 [-0.6074, -0.7870], 95 [-0.5760, -0.8259]], grad_fn=<LogSoftmaxBackward0>) 96 97 # 以上两步,等价于如下函数 98 # ls = nn.LogSoftmax(dim=1) 99 # print(ls(output)) 100 # >> tensor([[-0.6855, -0.7008], 101 # [-0.6261, -0.7650], 102 # [-0.6074, -0.7870], 103 # [-0.5760, -0.8259]], grad_fn=<LogSoftmaxBackward0>) 104 105 # 3. 计算loss的平均值 106 loss = output_log[range(len(output)), label] 107 print(loss) 108 >> tensor([-0.6855, -0.7650, -0.7870, -0.5760], grad_fn=<IndexBackward0>) 109 110 loss = abs(sum(loss)/len(loss)) 111 print(loss) 112 >> tensor(0.7034, grad_fn=<AbsBackward0>)
参考:https://blog.csdn.net/Lucinda6/article/details/116162198
损失函数交叉熵的理解:https://zhuanlan.zhihu.com/p/35709485