yolov3代码详解2-yolo的预测

from collections import OrderedDict

import torch
import torch.nn as nn

from nets.darknet import darknet53

                yolov3的结构框图

 

首先是最后一层输出的特征图,经过五次卷积处理后,作为yolo预测输出的同时,也送入另外一层做上采样,其他两个特征层会与上采样的数据进行contact,然后进行最后的卷积操作

                        

自定义的same padding卷积

def conv2d(filter_in, filter_out, kernel_size):
    pad = (kernel_size - 1) // 2 if kernel_size else 0
    return nn.Sequential(OrderedDict([
        ("conv", nn.Conv2d(filter_in, filter_out, kernel_size=kernel_size, stride=1, padding=pad, bias=False)),
        ("bn", nn.BatchNorm2d(filter_out)),
        ("relu", nn.LeakyReLU(0.1)),
    ]))

定义的最后七层的卷积操作

def make_last_layers(filters_list, in_filters, out_filter):
    m = nn.ModuleList([
        conv2d(in_filters, filters_list[0], 1),
        conv2d(filters_list[0], filters_list[1], 3),
        conv2d(filters_list[1], filters_list[0], 1),
        conv2d(filters_list[0], filters_list[1], 3),
        conv2d(filters_list[1], filters_list[0], 1),

        conv2d(filters_list[0], filters_list[1], 3),
        nn.Conv2d(filters_list[1], out_filter, kernel_size=1,stride=1, padding=0, bias=True)])

    return m

该函数返回的是一个包含七个卷积操作的列表。

定义yolo预测的类

class YoloBody(nn.Module):

首先继承darknet53的结构,确定卷积过程的输出通道数,计算yolo最终预测的三个通道数,最后搭建yolo预测的结构

    def __init__(self, anchor, num_classes):
        super(YoloBody, self).__init__()

        # 生成darknet53的主干模型
        self.backbone = darknet53(None)

        # 输出通道 : [64, 128, 256, 512, 1024],从darknet中调用的  emmmm
        out_filters = self.backbone.layers_out_filters

        # 计算yolo最终预测的输出通道数,每个cell三个先验框,每个box有五个参数 (x, y, w, h, confidence)
        # 对于voc数据集而言(20 + 1 + 4) x 3 = 75
        final_out_filter0 = len(anchor[0]) * (5 + num_classes)
        final_out_filter1 = len(anchor[1]) * (5 + num_classes)
        final_out_filter2 = len(anchor[2]) * (5 + num_classes)

        # yolo预测的结构
        self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0)

        self.last_layer1_conv = conv2d(512, 256, 1)
        self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1)

        self.last_layer2_conv = conv2d(256, 128, 1)
        self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, final_out_filter2)

前向流动过程

    def forward(self, x):
        # 分离多层卷积的结果,便于后续的处理
        def _branch(last_layer, layer_in):
            for i, e in enumerate(last_layer):
                layer_in = e(layer_in)    # e就是卷积层 相当于net(x)
                if i == 4:
                    out_branch = layer_in  # 第五次卷积后的结果进行分离
            return layer_in, out_branch

        # 获得三个有效特征层,他们的shape分别是:52,52,256;26,26,512;13,13,1024
        x2, x1, x0 = self.backbone(x)

        # 第一个特征层  out0 = (batch_size,255,13,13)
        # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
        # out0_branch即为最后两次卷积前的输出结果,由_branch分离得到
        out0, out0_branch = _branch(self.last_layer0, x0)

        # 13,13,512 -> 13,13,256 -> 26,26,256
        x1_in = self.last_layer1_conv(out0_branch)
        x1_in = self.last_layer1_upsample(x1_in)
        x1_in = torch.cat([x1_in, x1], 1)       # 26,26,256 + 26,26,512 -> 26,26,768
        #---------------------------------------------------#
        #   第二个特征层
        #   out1 = (batch_size,255,26,26)
        #---------------------------------------------------#
        # 26,26,768 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
        out1, out1_branch = _branch(self.last_layer1, x1_in)

        # 26,26,256 -> 26,26,128 -> 52,52,128
        x2_in = self.last_layer2_conv(out1_branch)
        x2_in = self.last_layer2_upsample(x2_in)

        # 52,52,128 + 52,52,256 -> 52,52,384
        x2_in = torch.cat([x2_in, x2], 1)
        #---------------------------------------------------#
        #   第一个特征层
        #   out3 = (batch_size,255,52,52)
        #---------------------------------------------------#
        # 52,52,384 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
        out2, _ = _branch(self.last_layer2, x2_in)

        return out0, out1, out2

 

posted @ 2021-06-22 14:55  Liang-ml  阅读(249)  评论(0)    收藏  举报