yolov3代码详解2-yolo的预测
from collections import OrderedDict import torch import torch.nn as nn from nets.darknet import darknet53

yolov3的结构框图
首先是最后一层输出的特征图,经过五次卷积处理后,作为yolo预测输出的同时,也送入另外一层做上采样,其他两个特征层会与上采样的数据进行contact,然后进行最后的卷积操作

自定义的same padding卷积
def conv2d(filter_in, filter_out, kernel_size): pad = (kernel_size - 1) // 2 if kernel_size else 0 return nn.Sequential(OrderedDict([ ("conv", nn.Conv2d(filter_in, filter_out, kernel_size=kernel_size, stride=1, padding=pad, bias=False)), ("bn", nn.BatchNorm2d(filter_out)), ("relu", nn.LeakyReLU(0.1)), ]))
定义的最后七层的卷积操作
def make_last_layers(filters_list, in_filters, out_filter): m = nn.ModuleList([ conv2d(in_filters, filters_list[0], 1), conv2d(filters_list[0], filters_list[1], 3), conv2d(filters_list[1], filters_list[0], 1), conv2d(filters_list[0], filters_list[1], 3), conv2d(filters_list[1], filters_list[0], 1), conv2d(filters_list[0], filters_list[1], 3), nn.Conv2d(filters_list[1], out_filter, kernel_size=1,stride=1, padding=0, bias=True)]) return m
该函数返回的是一个包含七个卷积操作的列表。
定义yolo预测的类
class YoloBody(nn.Module):
首先继承darknet53的结构,确定卷积过程的输出通道数,计算yolo最终预测的三个通道数,最后搭建yolo预测的结构
def __init__(self, anchor, num_classes): super(YoloBody, self).__init__() # 生成darknet53的主干模型 self.backbone = darknet53(None) # 输出通道 : [64, 128, 256, 512, 1024],从darknet中调用的 emmmm out_filters = self.backbone.layers_out_filters # 计算yolo最终预测的输出通道数,每个cell三个先验框,每个box有五个参数 (x, y, w, h, confidence) # 对于voc数据集而言(20 + 1 + 4) x 3 = 75 final_out_filter0 = len(anchor[0]) * (5 + num_classes) final_out_filter1 = len(anchor[1]) * (5 + num_classes) final_out_filter2 = len(anchor[2]) * (5 + num_classes) # yolo预测的结构 self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0) self.last_layer1_conv = conv2d(512, 256, 1) self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1) self.last_layer2_conv = conv2d(256, 128, 1) self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, final_out_filter2)
前向流动过程
def forward(self, x): # 分离多层卷积的结果,便于后续的处理 def _branch(last_layer, layer_in): for i, e in enumerate(last_layer): layer_in = e(layer_in) # e就是卷积层 相当于net(x) if i == 4: out_branch = layer_in # 第五次卷积后的结果进行分离 return layer_in, out_branch # 获得三个有效特征层,他们的shape分别是:52,52,256;26,26,512;13,13,1024 x2, x1, x0 = self.backbone(x) # 第一个特征层 out0 = (batch_size,255,13,13) # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 # out0_branch即为最后两次卷积前的输出结果,由_branch分离得到 out0, out0_branch = _branch(self.last_layer0, x0) # 13,13,512 -> 13,13,256 -> 26,26,256 x1_in = self.last_layer1_conv(out0_branch) x1_in = self.last_layer1_upsample(x1_in) x1_in = torch.cat([x1_in, x1], 1) # 26,26,256 + 26,26,512 -> 26,26,768 #---------------------------------------------------# # 第二个特征层 # out1 = (batch_size,255,26,26) #---------------------------------------------------# # 26,26,768 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 out1, out1_branch = _branch(self.last_layer1, x1_in) # 26,26,256 -> 26,26,128 -> 52,52,128 x2_in = self.last_layer2_conv(out1_branch) x2_in = self.last_layer2_upsample(x2_in) # 52,52,128 + 52,52,256 -> 52,52,384 x2_in = torch.cat([x2_in, x2], 1) #---------------------------------------------------# # 第一个特征层 # out3 = (batch_size,255,52,52) #---------------------------------------------------# # 52,52,384 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 out2, _ = _branch(self.last_layer2, x2_in) return out0, out1, out2

浙公网安备 33010602011771号