【机器学习】FRCNN rpn部分

代码： https://github.com/kbardool/keras-frcnn

rpn部分:

  1 def calc_rpn(C, img_data, width, height, resized_width, resized_height, img_length_calc_function):
  2 #C-配置信息；img_data-图片路径、bbox坐标、对应分类（一张图片可能有多个bbox）
  3 #width, height-图片原尺寸；resized_width, resized_height-图片resize后的尺寸
  4 #img_length_calc_function-一个方法，基于设置，从图片尺寸计算出网络之后的特征图尺寸
  5 
  6 
  7    downscale = float(C.rpn_stride)
  8    #下采样倍数
  9    anchor_sizes = C.anchor_box_scales
 10    anchor_ratios = C.anchor_box_ratios
 11    num_anchors = len(anchor_sizes) * len(anchor_ratios)
 12    #anchor的大小尺寸（3*3=9）
 13 
 14 
 15    # calculate the output map size based on the network architecture
 16    (output_width, output_height) = img_length_calc_function(resized_width, resized_height)
 17    #计算出特征图尺寸
 18 
 19 
 20    n_anchratios = len(anchor_ratios)
 21    #n_anchratios-anchor的尺寸个数
 22 
 23 
 24    # initialise empty output objectives
 25    #以0初始化配置、参数
 26    y_rpn_overlap = np.zeros((output_height, output_width, num_anchors))
 27    y_is_box_valid = np.zeros((output_height, output_width, num_anchors))
 28    y_rpn_regr = np.zeros((output_height, output_width, num_anchors * 4))
 29 
 30 
 31    num_bboxes = len(img_data['bboxes'])
 32 
 33 
 34    num_anchors_for_bbox = np.zeros(num_bboxes).astype(int)
 35    best_anchor_for_bbox = -1*np.ones((num_bboxes, 4)).astype(int)
 36    best_iou_for_bbox = np.zeros(num_bboxes).astype(np.float32)
 37    best_x_for_bbox = np.zeros((num_bboxes, 4)).astype(int)
 38    best_dx_for_bbox = np.zeros((num_bboxes, 4)).astype(np.float32)
 39 
 40 
 41    #将bbox的坐标分别通过缩放匹配到resize之后的特征图，记作gta，尺寸为（num_bboxes, 4）
 42    # get the GT box coordinates, and resize to account for image resizing
 43    gta = np.zeros((num_bboxes, 4))
 44    for bbox_num, bbox in enumerate(img_data['bboxes']):
 45       # get the GT box coordinates, and resize to account for image resizing
 46       gta[bbox_num, 0] = bbox['x1'] * (resized_width / float(width))
 47       gta[bbox_num, 1] = bbox['x2'] * (resized_width / float(width))
 48       gta[bbox_num, 2] = bbox['y1'] * (resized_height / float(height))
 49       gta[bbox_num, 3] = bbox['y2'] * (resized_height / float(height))
 50    
 51    # rpn ground truth
 52    for anchor_size_idx in range(len(anchor_sizes)):
 53       for anchor_ratio_idx in range(n_anchratios):
 54          anchor_x = anchor_sizes[anchor_size_idx] * anchor_ratios[anchor_ratio_idx][0]
 55          anchor_y = anchor_sizes[anchor_size_idx] * anchor_ratios[anchor_ratio_idx][1]  
 56          #计算ancohr的长宽。
 57          # #anchor_ratios[anchor_ratio_idx]-size为idx的长宽
 58 
 59 
 60          for ix in range(output_width):             
 61             # x-coordinates of the current anchor box  
 62             x1_anc = downscale * (ix + 0.5) - anchor_x / 2
 63             x2_anc = downscale * (ix + 0.5) + anchor_x / 2
 64             
 65             # ignore boxes that go across image boundaries             
 66             if x1_anc < 0 or x2_anc > resized_width:
 67                continue
 68                
 69             for jy in range(output_height):
 70 
 71 
 72                # y-coordinates of the current anchor box
 73                y1_anc = downscale * (jy + 0.5) - anchor_y / 2
 74                y2_anc = downscale * (jy + 0.5) + anchor_y / 2
 75 
 76 
 77                # ignore boxes that go across image boundaries
 78                if y1_anc < 0 or y2_anc > resized_height:
 79                   continue
 80                #-------------------------------------------------------------#
 81                #将特征图每一点都作为锚点，通过downscale映射到图片的实际尺寸，再结合anchor的尺寸
 82                #忽略超出图片范围的anchor
 83                #得到大小、比例不一的多个锚框
 84 
 85 
 86                #定义变量
 87                # bbox_type indicates whether an anchor should be a target
 88                #初始化bbox的类型为负样本
 89                bbox_type = 'neg'
 90 
 91 
 92                # this is the best IOU for the (x,y) coord and the current anchor
 93                # note that this is different from the best IOU for a GT bbox
 94                best_iou_for_loc = 0.0
 95 
 96 
 97                #对锚框进行遍历：
 98                for bbox_num in range(num_bboxes):
 99                   
100                   # get IOU of the current GT box and the current anchor box
101                   #遍历gta里的bbox，与anchor求iou
102                   curr_iou = iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]], [x1_anc, y1_anc, x2_anc, y2_anc])
103                   # calculate the regression targets if they will be needed
104                   #若iou大于阈值或当前gta最大值
105                   if curr_iou > best_iou_for_bbox[bbox_num] or curr_iou > C.rpn_max_overlap:
106                      #计算gta和anchor的中心点坐标
107                      #通过中心点坐标和bbox坐标，计算出x,y,w,h四个值的梯度值，用于后面的回归计算
108                      cx = (gta[bbox_num, 0] + gta[bbox_num, 1]) / 2.0
109                      cy = (gta[bbox_num, 2] + gta[bbox_num, 3]) / 2.0
110                      cxa = (x1_anc + x2_anc)/2.0
111                      cya = (y1_anc + y2_anc)/2.0
112 
113 
114                      tx = (cx - cxa) / (x2_anc - x1_anc)
115                      ty = (cy - cya) / (y2_anc - y1_anc)
116                      tw = np.log((gta[bbox_num, 1] - gta[bbox_num, 0]) / (x2_anc - x1_anc))
117                      th = np.log((gta[bbox_num, 3] - gta[bbox_num, 2]) / (y2_anc - y1_anc))
118 
119 
120                   #对正样本进行操作
121                   if img_data['bboxes'][bbox_num]['class'] != 'bg':
122 
123 
124                      #所有对象都有anchor对应，所以追踪anchor就找到了对象
125                      # all GT boxes should be mapped to an anchor box, so we keep track of which anchor box was best
126                      # 如果iou>当前gta的最大值，更新当前gta的最匹配anchor，最大交集，最优region坐标，最优梯度
127                      if curr_iou > best_iou_for_bbox[bbox_num]:
128                         best_anchor_for_bbox[bbox_num] = [jy, ix, anchor_ratio_idx, anchor_size_idx]
129                         best_iou_for_bbox[bbox_num] = curr_iou
130                         best_x_for_bbox[bbox_num,:] = [x1_anc, x2_anc, y1_anc, y2_anc]
131                         best_dx_for_bbox[bbox_num,:] = [tx, ty, tw, th]
132 
133 
134                      # we set the anchor to positive if the IOU is >0.7 (it does not matter if there was another better box, it just indicates overlap)
135                      #如果iou>0.7，bbox_type设为pos-正样本（默认是neg）
136                      if curr_iou > C.rpn_max_overlap:
137                         bbox_type = 'pos'
138                         #当前bbox的匹配anchor数+1
139                         num_anchors_for_bbox[bbox_num] += 1
140 
141 
142                         #如果iou>当前best_iou_for_loc，则将best_regr设为当前的tx,ty,tw,th
143                         # we update the regression layer target if this IOU is the best for the current (x,y) and anchor position
144                         if curr_iou > best_iou_for_loc:
145                            best_iou_for_loc = curr_iou
146                            best_regr = (tx, ty, tw, th)
147 
148 
149                      #如果iou介于最大和最小阈值之间，设定bbox_type为neutral（中立）
150                      # if the IOU is >0.3 and <0.7, it is ambiguous and no included in the objective
151                      if C.rpn_min_overlap < curr_iou < C.rpn_max_overlap:
152                         # gray zone between neg and pos
153                         if bbox_type != 'pos':
154                            bbox_type = 'neutral'
155 
156 
157 
158 
159                # turn on or off outputs depending on IOUs
160                #根据type区分bbox
161                if bbox_type == 'neg':
162                   #负样本
163                   y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1
164                   #有意义
165                   y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0
166                   #不包含对象
167                elif bbox_type == 'neutral':
168                   #中立
169                   y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0
170                   #无意义，不包含对象
171                   y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0
172                elif bbox_type == 'pos':
173                   #正样本
174                   y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1
175                   y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1
176                   #有意义，包含对象
177                   #（？？？体悟明咯，咩start啊）
178                   start = 4 * (anchor_ratio_idx + n_anchratios * anchor_size_idx)
179                   y_rpn_regr[jy, ix, start:start+4] = best_regr
180 
181 
182    # we ensure that every bbox has at least one positive RPN region
183    #每个bbox都至少要有一个pos的anchor，如果没有的话在中性anchor中挑最好的
184    for idx in range(num_anchors_for_bbox.shape[0]):
185       #如果没有bbox没有pos的anchor
186       if num_anchors_for_bbox[idx] == 0:
187          # no box with an IOU greater than zero ...
188          if best_anchor_for_bbox[idx, 0] == -1:
189             continue
190          y_is_box_valid[
191             best_anchor_for_bbox[idx,0], best_anchor_for_bbox[idx,1], best_anchor_for_bbox[idx,2] + n_anchratios *
192             best_anchor_for_bbox[idx,3]] = 1
193          y_rpn_overlap[
194             best_anchor_for_bbox[idx,0], best_anchor_for_bbox[idx,1], best_anchor_for_bbox[idx,2] + n_anchratios *
195             best_anchor_for_bbox[idx,3]] = 1
196          #(好似每个pos都有start、y_rpn_regr喔)
197          start = 4 * (best_anchor_for_bbox[idx,2] + n_anchratios * best_anchor_for_bbox[idx,3])
198          y_rpn_regr[
199             best_anchor_for_bbox[idx,0], best_anchor_for_bbox[idx,1], start:start+4] = best_dx_for_bbox[idx, :]
200 
201 
202    #用numpy大法进行anchor的回归
203    y_rpn_overlap = np.transpose(y_rpn_overlap, (2, 0, 1))
204    y_rpn_overlap = np.expand_dims(y_rpn_overlap, axis=0)
205 
206 
207    y_is_box_valid = np.transpose(y_is_box_valid, (2, 0, 1))
208    y_is_box_valid = np.expand_dims(y_is_box_valid, axis=0)
209 
210 
211    y_rpn_regr = np.transpose(y_rpn_regr, (2, 0, 1))
212    y_rpn_regr = np.expand_dims(y_rpn_regr, axis=0)
213 
214 
215    pos_locs = np.where(np.logical_and(y_rpn_overlap[0, :, :, :] == 1, y_is_box_valid[0, :, :, :] == 1))
216    neg_locs = np.where(np.logical_and(y_rpn_overlap[0, :, :, :] == 0, y_is_box_valid[0, :, :, :] == 1))
217 
218 
219    num_pos = len(pos_locs[0])
220 
221 
222    # one issue is that the RPN has many more negative than positive regions, so we turn off some of the negative
223    # regions. We also limit it to 256 regions.
224    #设定regions数量的最大值，并对正负样本均匀取样
225    num_regions = 256
226 
227 
228    if len(pos_locs[0]) > num_regions/2:
229       val_locs = random.sample(range(len(pos_locs[0])), len(pos_locs[0]) - num_regions/2)
230       y_is_box_valid[0, pos_locs[0][val_locs], pos_locs[1][val_locs], pos_locs[2][val_locs]] = 0
231       num_pos = num_regions/2
232 
233 
234    if len(neg_locs[0]) + num_pos > num_regions:
235       val_locs = random.sample(range(len(neg_locs[0])), len(neg_locs[0]) - num_pos)
236       y_is_box_valid[0, neg_locs[0][val_locs], neg_locs[1][val_locs], neg_locs[2][val_locs]] = 0
237 
238 
239    #得到二分类和回归的结果
240    y_rpn_cls = np.concatenate([y_is_box_valid, y_rpn_overlap], axis=1)
241    y_rpn_regr = np.concatenate([np.repeat(y_rpn_overlap, 4, axis=1), y_rpn_regr], axis=1)
242 
243 
244    return np.copy(y_rpn_cls), np.copy(y_rpn_regr)

rpn部分整合：

 1 def rpn(base_layers,num_anchors):
 2 
 3 
 4     x = Convolution2D(512, (3, 3), padding='same', activation='relu', kernel_initializer='normal', name='rpn_conv1')(base_layers)
 5 
 6 
 7     # 通过1*1的卷积生成num_anchors数量的channel，每个channel包含特征图（w*h）个relu激活值，表明anchor是否有意义
 8     #number_anchors=9（9种anchor）
 9     x_class = Convolution2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='rpn_out_class')(x)
10     x_regr = Convolution2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_out_regress')(x)
11     #x_class是原大小，深度为9（9种anchor的前/背景分类）
12     #x_regr是原大小，深度为9*4（9种anchor的坐标）
13     return [x_class, x_regr, base_layers]

posted on 2022-08-02 00:31 Jolyne123 阅读(115) 评论(0) 收藏举报