mmdet阅读笔记

不清楚可以点击查看

mmdet

后续陆续增加源码注释

-- mmdetection.configs
注意： _base_里面的文件都是基础的配置，后面的配置文件调用之后可以修改，以后面的为准
configs/base/dataset: 基础数据的配置文件
configs/base/models: 基础模型的配置文件
configs/base/schedules: 基础超参数的配置文件
configs/base/default_runtime.py: 基础实时配置文件，包括：模型保存间隔，dist后端配置....etc
configs/others: 上层配置文件，调用base里面的配置，然后针对不同模型不同情况重新封装，实际调用以这个配置参数为准，基础只是通用配置。
-- mmdetection.demo
/demo/all: 主要是前向计算测试文件
-- mmdetection.mmdet
/mmdet/apis: 训练和前向计算实例化
/mmdet/core: anchor和bbox等操作具体实现，并被包裹到registry

/mmdet/core/anchor/anchor_generator.py: anchor构建


# 此类是传统anchor生成方法，比如RCNN
class AnchorGenerator(object):
    # --base_sizes: anchor面积大小，如果为None就使用strides代替
    # --scales: anchor大小，base_sizes、strides类似
    # --ratios: anchor长宽比
    # --strides: anchor在feature map上的步长，也等于feature_map和原图的比例
    def __init__(self,
             strides,
             ratios,
             scales=None,
             base_sizes=None,
             scale_major=True,
             octave_base_scale=None,
             scales_per_octave=None,
             centers=None,
             center_offset=0.):
        pass
    # 获得基础anchor的数量,比如ssd300->[4，6，6，6，4，4]
    def num_base_anchors(self):
        pass
    # 获得特征层数，比如ssd300->6层
    def num_levels(self):
        pass
    # 计算基础anchor，等于每个层(6层)最左上角的一点的所有anchor
    def gen_base_anchors(self):
        pass
    # 传入特征大小和步长，计算全部anchor分布，将上述的gen_base_anchors一个点anchor做平移到整个特征图，平移长度为strides
    def gen_single_level_base_anchors(self,
                                      base_size,
                                      scales,
                                      ratios,
                                      center=None):
    # 和numpy meshgride类似，获得二维feature的xy坐标
    def _meshgrid(self, x, y, row_major=True):
        pass
    # 有效标志，考虑到padding情况，部分anchor不合理，合理1，不合理0
    def valid_flags(self, featmap_sizes, pad_shape, device='cuda'):
        pass

/mmdet/core/bbox/assigners: 正负样本分配
/mmdet/core/bbox/max_iou_assigner.py: 使用最大IOU进行正负样本分配

class MaxIoUAssigner(BaseAssigner):
    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
        '''
            bboxes: anchor，ssd300生成8732个anchor
            gt_bboxes: 目标,shape(N,4)
            gt_bboxes_ignore: 忽略的bbox，部分coco数据集中存在一些非常小的目标或异常目标
            gt_labels: 标签,shape(N,1)
        '''
        assign_on_cpu = True if (self.gpu_assign_thr > 0) and (
            gt_bboxes.shape[0] > self.gpu_assign_thr) else False 
        # 如果一个图上的目标非常多，建议使用cpu
        if assign_on_cpu:
            device = bboxes.device
            bboxes = bboxes.cpu()
            gt_bboxes = gt_bboxes.cpu()
            if gt_bboxes_ignore is not None:
                gt_bboxes_ignore = gt_bboxes_ignore.cpu()
            if gt_labels is not None:
                gt_labels = gt_labels.cpu()

        overlaps = self.iou_calculator(gt_bboxes, bboxes) # 在gpu上计算iou

        if (self.ignore_iof_thr > 0 and gt_bboxes_ignore is not None
                and gt_bboxes_ignore.numel() > 0 and bboxes.numel() > 0):
            if self.ignore_wrt_candidates:
                ignore_overlaps = self.iou_calculator(
                    bboxes, gt_bboxes_ignore, mode='iof')
                ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
            else:
                ignore_overlaps = self.iou_calculator(
                    gt_bboxes_ignore, bboxes, mode='iof')
                ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
            overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1

        assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)
        if assign_on_cpu:
            assign_result.gt_inds = assign_result.gt_inds.to(device)
            assign_result.max_overlaps = assign_result.max_overlaps.to(device)
            if assign_result.labels is not None:
                assign_result.labels = assign_result.labels.to(device)
        return assign_result
        
    def assign_wrt_overlaps(self, overlaps, gt_labels=None):
    
        num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)
        
        # 由于每次传入的是一张图，所以assigned_gt_inds是8732*1即可，将其作为所以anchor的标志位
        # 1. 初始化为-1
        assigned_gt_inds = overlaps.new_full((num_bboxes, ),
                                             -1,
                                             dtype=torch.long)
        # 无目标情况，一般不会出现这种情况
        if num_gts == 0 or num_bboxes == 0:
            # No ground truth or boxes, return empty assignment
            max_overlaps = overlaps.new_zeros((num_bboxes, ))
            if num_gts == 0:
                # No truth, assign everything to background
                assigned_gt_inds[:] = 0
            if gt_labels is None:
                assigned_labels = None
            else:
                assigned_labels = overlaps.new_full((num_bboxes, ),
                                                    -1,
                                                    dtype=torch.long)
            return AssignResult(
                num_gts,
                assigned_gt_inds,
                max_overlaps,
                labels=assigned_labels)

        # for each anchor, which gt best overlaps with it
        # for each anchor, the max iou of all gts
        max_overlaps, argmax_overlaps = overlaps.max(dim=0) # shape:(1*8732),比较N个目标保留一个最大值，获得每个位置(anchor)的最大IOU框
        # for each gt, which anchor best overlaps with it
        # for each gt, the max iou of all proposals
        gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1) # shape:(N*1),比较每个目标的8732个框保留一个最大值，获得每个目标的最大IOU框

        # 2. assign negative: below
        # the negative inds are set to be 0
        if isinstance(self.neg_iou_thr, float):
            assigned_gt_inds[(max_overlaps >= 0)
                             & (max_overlaps < self.neg_iou_thr)] = 0 # 负样本IOU->[0,neg_iou_thr]
        elif isinstance(self.neg_iou_thr, tuple):
            assert len(self.neg_iou_thr) == 2
            assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0])
                             & (max_overlaps < self.neg_iou_thr[1])] = 0 # 负样本IOU->[neg_iou_thr[0],neg_iou_thr[1]]

        # 3. assign positive: above positive IoU threshold
        pos_inds = max_overlaps >= self.pos_iou_thr
        assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1 # 正样本使用目标ID[0-N]，加一防止第一个目标index=0和负样本冲突

        # 防止不同目标靠的太近(anchor分配不合理等,特殊情况也会出现)，假设存在目标B和C，anchor为a_f/a_g, a_f的交集(B@80%, C@79%), a_g(B@81%, C@80%), 那么C与两个anchor
        # 的交集始终没有B的交集大，那么两个anchor全部当做B类进行处理，C如何回归？
        # 下面的方法也无法100%保证所有目标都分配到anchor，但可避免上述情况
        if self.match_low_quality:
            # Low-quality matching will overwirte the assigned_gt_inds assigned
            # in Step 3. Thus, the assigned gt might not be the best one for
            # prediction.
            # For example, if bbox A has 0.9 and 0.8 iou with GT bbox 1 & 2,
            # bbox 1 will be assigned as the best target for bbox A in step 3.
            # However, if GT bbox 2's gt_argmax_overlaps = A, bbox A's
            # assigned_gt_inds will be overwritten to be bbox B.
            # This might be the reason that it is not used in ROI Heads.
            for i in range(num_gts):
                if gt_max_overlaps[i] >= self.min_pos_iou:
                    if self.gt_max_assign_all:
                        max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]
                        assigned_gt_inds[max_iou_inds] = i + 1
                    else:
                        assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1

        if gt_labels is not None:
            assigned_labels = assigned_gt_inds.new_full((num_bboxes, ), -1)
            pos_inds = torch.nonzero(
                assigned_gt_inds > 0, as_tuple=False).squeeze()
            if pos_inds.numel() > 0:
                assigned_labels[pos_inds] = gt_labels[
                    assigned_gt_inds[pos_inds] - 1]
        else:
            assigned_labels = None

        return AssignResult(
            num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels) # 返回一个类，将参数传入其中

/mmdet/core/bbox/coder: 编码anchor和output
/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py: SSD方式编码
/mmdet/core/bbox/coder/iou_calculators: IOU具体操作在这里实现
/mmdet/core/bbox/coder/samplers: 正负样本采样，针对样本不均衡等问题，SSD使用负样本挖掘方式
/mmdet/core/bbox/coder/samplers/sampling_result.py: 和上面的assign_result类似，都是将数据写入一个类中，方便管理和操作

/mmdet/datasets: 数据读取处理函数

/datasets/pipelines: 数据增强具体实现和Compose
/datasets/samplers:
-- distributed_sampler.py: 重写了distributed_sampler类，和torch原版一点没变，仅仅改了名字。
-- group_sampler.py:

class GroupSampler(Sampler):
    # samples_per_gpu: 使用的GPU数量
    def __init__(self, dataset, samples_per_gpu=3):
        assert hasattr(dataset, 'flag') # 数据中的变量，用来分配类别，在datasets/cumtom.py定义
        self.dataset = dataset
        self.samples_per_gpu = samples_per_gpu
        self.flag = dataset.flag.astype(np.int64)
        self.group_sizes = np.bincount(self.flag)
        self.num_samples = 0
        for i, size in enumerate(self.group_sizes):
            self.num_samples += int(np.ceil(
                size / self.samples_per_gpu)) * self.samples_per_gpu # 不是整数取最大值

    def __iter__(self):
        indices = []
        for i, size in enumerate(self.group_sizes):
            if size == 0:
                continue
            indice = np.where(self.flag == i)[0]
            assert len(indice) == size
            np.random.shuffle(indice) # random sample
            num_extra = int(np.ceil(size / self.samples_per_gpu)
                            ) * self.samples_per_gpu - len(indice) # 不能整除的额外数据 数量
            indice = np.concatenate(
                [indice, np.random.choice(indice, num_extra)]) # 不能整除的额外数据 使用前面数据随机取出的数补充
            indices.append(indice)
        indices = np.concatenate(indices)
        indices = [
            indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu]
            for i in np.random.permutation(
                range(len(indices) // self.samples_per_gpu)) # 分配到每个GPU
        ]
        indices = np.concatenate(indices)
        indices = indices.astype(np.int64).tolist()
        assert len(indices) == self.num_samples
        return iter(indices)

/torch/utils/data/dataset:

class ConcatDataset(Dataset):

    def __init__(self, datasets):
        self.cumulative_sizes = self.cumsum(self.datasets) # 叠加长度总和[len_1, len_1+len_2, len_1+len_2+len_3]

    def __len__(self):
        return self.cumulative_sizes[-1]#总长度

    def __getitem__(self, idx):
        # 反向索引 
        if idx < 0:
            if -idx > len(self):
                raise ValueError("absolute value of index should not exceed dataset length")
            idx = len(self) + idx
        # 二分查找子数据集
        dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
        if dataset_idx == 0:
            sample_idx = idx
        else:
            sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
        return self.datasets[dataset_idx][sample_idx] # 获得 指定子数据集 的 指定位置数据

    # 老版本名字已更改，可以更改数据集长度
    def cummulative_sizes(self):
        warnings.warn("cummulative_sizes attribute is renamed to "
                      "cumulative_sizes", DeprecationWarning, stacklevel=2)
        return self.cumulative_sizes

/datasets/builder: 实例化数据相关任务:sample、dataloader、dataset
/datasets/dataset_wrappers.py: 重写concatDataset、RepeatDataset上面已经详细说明，增加数据类别平衡类（具体没看）
/datasets/custom.py:

@DATASETS.register_module()
class CustomDataset(Dataset):
    
    CLASSES = None #种类名称，可以直接定义(常用直接类内定义)，也可以外部传入
    
    # 读取全部标签，格式如下：
    ‘’‘
    {
                'filename': 'a.jpg',
                'width': 1280,
                'height': 720,
                'ann': 
                    {
                        'bboxes': <np.ndarray> (n, 4),
                        'labels': <np.ndarray> (n, ),
                        'bboxes_ignore': <np.ndarray> (k, 4), (optional field)
                        'labels_ignore': <np.ndarray> (k, 4) (optional field)
                    }
    },
    ’‘’
    def load_annotations(self, ann_file):
        pass
    
    # 暂不确定用途
    def load_proposals(self, proposal_file):
        pass
    
    # 过滤不符合条件数据
    def _filter_imgs(self, min_size=32):
        pass
    
    # 获取单个train数据
    def prepare_train_img(self, idx):
        pass
        
    # 获取单个test数据
    def prepare_test_img(self, idx):
    
    # 获得单个图像标注信息
    def get_ann_info(self, idx):
        pass
    
    # 随机选择数据，会使用_set_group_flag
    def _rand_another(self, idx):
        pass
    
    # 按特定格式给图像分类（原始使用长宽比）
    def _set_group_flag(self):
        pass

整个数据读取流程比较清晰：

graph TD A_1[准备特定格式label] --> A_2 A_2[读取全部label] --> A_3(过滤不合适label) A_3 --> C{train/test} C -->|train | D[读取图像信息+label信息] C -->|test| E[和train类似] D --> D_1{合适/不合适} D_1 --> |不合适| D_2(随机选取) D_1 --> |合适| D_3(直接选取)

/mmdet/models: 模型实际实现函数
/mmdet.ops: 需要快速实现的操作，如：NMS、ROIPooling、ROIAlign....
/mmdet/utils: 一些辅助操作，环境变量和版本等
-- mmdetection.tests
/tests/all: 测试脚本，可以用来查看原理和测试
-- mmdetection.tools
/tools/all: 杂七杂八文件，包括：训练+测试（仅是入口，实际操作在apis之内），数据转换、计算MAC、转换模型ONNX.....
/tools/train.py: 单机单卡
/tools/dist_train.py: 单机单多卡，使用distribution
/tools/slurm_train.py: 多机多卡

大致流程:

准备数据集，在mmdet/datasets
准备模型，在mmdet/models, loss函数在models里面实现
准备特殊函数，在/mmdet/core，一些mmdet没有的操作
配置参数，在/configs, 基础配置可选，后面的参数必须配置
训练模型，在/mmdet/tools, 调用评估可在配置里设置
前向推理，在/demo

Already open...

...

![](https://img2020.cnblogs.com/blog/1033571/202007/1033571-20200727215053605-374644037.png)

posted on 2020-07-08 17:51 影醉阏轩窗阅读(3421) 评论(1) 收藏举报

刷新页面返回顶部

寂寞的小乞丐

mmdet阅读笔记

mmdet

导航

公告