SSD训练细节

sampling·py

import random
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.autograd import Function

from bbox import bboxIOU

__all__ = ["buildPredBoxes", "sampleEzDetect"]

def buildPredBoxes(config):
    predBoxes = []

    for i in range(len(config.mboxes)):
        l = config.mboxes[i][0]
        wid = config.featureSize[l][0]
        hei = config.featureSize[l][1]

        wbox = config.mboxes[i][1]
        hbox = config.mboxes[i][2]

        for y in range(hei):
            for x in range(wid):
                xc = (x + 0.5) / wid #x y位置都取每个feature map像素的中心点来计算
                yc = (y + 0.5) / hei
                '''   #计算一个以(xc, yc)为中心，宽度为wbox，高度为hbox的矩形框（或称为边界框）在归一化坐标系（通常是[0, 1]范围内）中的实际边界坐标。
                xmin = max(0, xc-wbox/2)
                ymin = max(0, yc-hbox/2)
                xmax = min(1, xc+wbox/2)
                ymax = min(1, yc+hbox/2)
                '''
                xmin = xc-wbox/2
                ymin = yc-hbox/2
                xmax = xc+wbox/2
                ymax = yc+hbox/2

                predBoxes.append([xmin, ymin, xmax, ymax])

    return predBoxes

def sampleEzDetect(config, bboxes): #在voc_dataset.py的vocDataset类中用到的sampleEzDetect函数
    ## preparing pred boxes
    predBoxes = config.predBoxes  从某个配置对象（config）中获取名为 predBoxes 的属性或数据

    ## preparing groud truth
    truthBoxes = []
    for i in range(len(bboxes)):
        truthBoxes.append( [bboxes[i][1], bboxes[i][2], bboxes[i][3], bboxes[i][4]] )

    ## computing iou
    iouMatrix = []
    for i in predBoxes:
        ious = []
        for j in truthBoxes:
            ious.append( bboxIOU(i, j) )
        iouMatrix.append(ious)

    iouMatrix = torch.FloatTensor( iouMatrix )
    iouMatrix2 = iouMatrix.clone()  创建副本

    ii = 0
    selectedSamples = torch.FloatTensor(128*1024)

    ## positive samples from bi-direction match
    for i in range(len(bboxes)):
        iouViewer = iouMatrix.view(-1)
        iouValues, iouSequence = torch.max(iouViewer, 0)

        predIndex = iouSequence[0] // len(bboxes)#这行代码使用整数除法（//）来计算 iouSequence[0] 除以 len(bboxes) 的商，并将结果赋值给 predIndex。这个操作通常用于获取某种“类别”或“组”的索引，其中 iouSequence[0] 可能是一个复合索引，包含了类别和边界框索引的信息。通过除以边界框的数量，我们可以得到与类别或组相关的索引部分。
        bboxIndex = iouSequence[0] % len(bboxes)#这行代码使用模运算（%）来计算 iouSequence[0] 除以 len(bboxes) 的余数，并将结果赋值给 bboxIndex。这个操作用于获取与具体边界框相关的索引部分。由于模运算的结果总是在 0 到 len(bboxes)-1 的范围内，因此 bboxIndex 可以直接用作 bboxes 列表中边界框的索引

        if ( iouValues[0] > 0.1):
            selectedSamples[ii*6 + 1] = bboxes[bboxIndex][0]
            selectedSamples[ii*6 + 2] = bboxes[bboxIndex][1]
            selectedSamples[ii*6 + 3] = bboxes[bboxIndex][2]
            selectedSamples[ii*6 + 4] = bboxes[bboxIndex][3]
            selectedSamples[ii*6 + 5] = bboxes[bboxIndex][4]
            selectedSamples[ii*6 + 6] = predIndex  #将 bboxes 列表中某个边界框（由 bboxIndex 索引）的坐标值复制到 selectedSamples 列表中的特定位置
            ii  = ii + 1
        else:
            break

        iouMatrix[:, bboxIndex] = -1# iouMatrix 中所有行在 bboxIndex 列的值设置为 -1
        iouMatrix[predIndex, :] = -1#您正在标记与特定预测或分类（由 predIndex 索引）相关的所有IoU值为无效或未定义。
        iouMatrix2[predIndex,:] = -1#它同样将 predIndex 行的所有值设置为 -1

    ## also samples with high iou
    for i in range(len(predBoxes)):
        v,_ = iouMatrix2[i].max(0)
        predIndex = i
        bboxIndex = _[0]

        if ( v[0] > 0.7): #anchor与真实值iou大于0.7的为正样本
            selectedSamples[ii*6 + 1] = bboxes[bboxIndex][0]
            selectedSamples[ii*6 + 2] = bboxes[bboxIndex][1]
            selectedSamples[ii*6 + 3] = bboxes[bboxIndex][2]
            selectedSamples[ii*6 + 4] = bboxes[bboxIndex][3]
            selectedSamples[ii*6 + 5] = bboxes[bboxIndex][4]
            selectedSamples[ii*6 + 6] = predIndex
            ii  = ii + 1

        elif (v[0] > 0.5): 
            selectedSamples[ii*6 + 1] = bboxes[bboxIndex][0] * -1
            selectedSamples[ii*6 + 2] = bboxes[bboxIndex][1]
            selectedSamples[ii*6 + 3] = bboxes[bboxIndex][2]
            selectedSamples[ii*6 + 4] = bboxes[bboxIndex][3]
            selectedSamples[ii*6 + 5] = bboxes[bboxIndex][4]
            selectedSamples[ii*6 + 6] = predIndex
            ii  = ii + 1

    selectedSamples[0] = ii
    return selectedSamples

encodeBox和decodeBox的过程

def encodeBox(config, box, predBox):
    pcx = (predBox[0] + predBox[2]) / 2  （边界框中心点x坐标）是通过取边界框左右坐标的平均值来计算的
    pcy = (predBox[1] + predBox[3]) / 2
    pw = (predBox[2] - predBox[0])（边界框中心点x坐标）是通过取边界框左右坐标的平均值来计算的
    ph = (predBox[3] - predBox[1])（边界框高度）是通过取边界框上下坐标的差值来计算的，

    ecx = (box[0] + box[2]) / 2 - pcx
    ecy = (box[1] + box[3]) / 2 - pcy
    ecx = ecx / pw * 10
    ecy = ecy / ph * 10

    ew = (box[2] - box[0]) / pw
    eh = (box[3] - box[1]) / ph
    ew = math.log(ew) * 5
    eh = math.log(eh) * 5

    return[ecx, ecy, ew, eh]

def decodeAllBox(config, allBox):
    newBoxes = torch.FloatTensor(allBox.size())

    batchSize = newBoxes.size()[0]
    for k in range(len(config.predBoxes)):
        predBox = config.predBoxes[k]
        pcx = (predBox[0] + predBox[2]) / 2
        pcy = (predBox[1] + predBox[3]) / 2
        pw = (predBox[2] - predBox[0])
        ph = (predBox[3] - predBox[1])

        for i in range(batchSize):
            box = allBox[i, k, :]

            dcx = box[0] / 10 * pw + pcx
            dcy = box[1] / 10 * ph + pcy 计算了矩形框左上角（由box[0]和box[1]表示）相对于某个中心点（pcx, pcy）的新位置

            dw = math.exp(box[2]/5) * pw
            dh = math.exp(box[3]/5) * ph计算宽度与高度

            newBoxes[i, k, 0] = max(0, dcx - dw/2)
            newBoxes[i, k, 1] = max(0, dcy - dh/2)
            newBoxes[i, k, 2] = min(1, dcx + dw/2)
            newBoxes[i, k, 3] = min(1, dcy + dh/2)

    if config.gpu :
       newBoxes = newBoxes.cuda()

    return newBoxes

import sys
import math
import torch

__all__ = ["bboxIOU", "encodeBox", "decodeAllBox", "doNMS"]

def doNMS(config, classMap, allBoxes, threshold):

    winBoxes = []

    predBoxes = config.predBoxes

    for c in range(1, config.classNumber):
        fscore = classMap[:, c]
        #print(fscore)

        v,s = torch.sort(fscore, 0, descending=True)
        print(">>>>>>>>>>>>>>>",c,v[0])
        for i in range(len(v)):
            if ( v[i] < threshold):
                continue

            k = s[i]
            boxA = [allBoxes[k, 0], allBoxes[k, 1], allBoxes[k, 2], allBoxes[k, 3]]

            for j in range(i+1, len(v)):
                if ( v[j] < threshold):
                    continue

                k = s[j]
                boxB = [allBoxes[k, 0], allBoxes[k, 1], allBoxes[k, 2], allBoxes[k, 3]]

                iouValue = bboxIOU(boxA, boxB)
                if ( iouValue > 0.5):
                    v[j] = 0

        for i in range(len(v)):
            if ( v[i] < threshold):
                continue

            k = s[i]
            #box = [predBoxes[k][0], predBoxes[k][1], predBoxes[k][2], predBoxes[k][3]]
            box = [allBoxes[k, 0], allBoxes[k, 1], allBoxes[k, 2], allBoxes[k, 3]]

            winBoxes.append(box)
    return winBoxes

posted on 2024-08-09 20:55 风起- 阅读(48) 评论(0) 收藏举报

刷新页面返回顶部

SSD训练细节

公告