1 # -*- coding: utf-8 -*
2
3 '''
4 Targets:
5 1. 在一个汽车检测数据上使用目标检测算法YOLO
6 2. 处理bounding boxed
7
8 Datasets: Drive.ai https://www.drive.ai/
9 input: (m,608,608,3)
10 output:(pc, bx,by,bh,bw,c)
11
12 Anchor nums: 5
13
14 Architecture:
15 IMAGE(m,608,608,3)->DEEP CNN-> ENCODING(m,19,19,5,85)/(m,19,19,425)
16
17 YOLO:You only look once algrithm
18 '''
19 import sys
20 import importlib
21 importlib.reload(sys)
22
23 import argparse
24 import imghdr
25 # import sys
26 import os
27 import matplotlib.pyplot as plt
28 from matplotlib.pyplot import imshow
29 import scipy.io
30 import scipy.misc
31 import numpy as np
32 import pandas as pd
33 import PIL
34 import tensorflow as tf
35 from keras import backend as K
36 #import kera's backend as K. use a Keras function in this notebook, just write K.function()
37 from keras.layers import Input, Lambda, Conv2D
38 from keras.models import load_model, Model
39 from yolo_utils import read_classes, read_anchors, generate_colors, preprocess_image, draw_boxes, scale_boxes
40 from yad2k.models.keras_yolo import yolo_head, \
41 yolo_boxes_to_corners, preprocess_true_boxes, yolo_loss, yolo_body
42
43 # reload(sys)
44 # sys.setdefaultencoding('utf8')
45
46
47 #过滤阈值 _ 筛除多余的box
48 def yolo_filter_boxes(box_confidence,boxes,box_class_probs,threshold=0.6):
49 """
50 box_confidence:
51 tensor of shape(19x19,5,1) containing p_c
52 boxes:
53 tensor of shape(19x19,5,4) containing (b_x,b_y,b_h,b_w) for each
54 of the 5 boxes per cell.
55 box_class_probs:
56 tensor of shape(19x19,5,80) containing the detection probilities
57 (c1,c2...c80) for each of the 80 classes for each of the 5 boxes
58 per cell
59 threshold:
60 real value, if [highest class probability score < threshold],
61 then get rid of the corresponding box
62
63 outputs
64 scores:tensor of shape(None)选中boxes的概率分数;(None不知道因为其值和阈值设置有关)
65 boxes:tensor of shape(None,4)包括了(b_x,b_y,b_h,b_w)信息
66 classes:(None,)包含所选框检测到的类的索引
67
68 "None" is here because you don't know the exact number of selected boxes,
69 as it depends on the threshold.
70 """
71
72 #Compute box scores
73 box_scores = box_confidence * box_class_probs
74
75 #find the max box_classes --> -1: y axis
76 box_classes = K.argmax(box_scores, axis = -1)
77 box_class_scores = K.max(box_scores, axis = -1)
78
79 #Create a filter mask based on "box_class_scores" by using "threshold"
80 filtering_mask = box_class_scores>=threshold
81
82 #utilize the mask to filter the box that don't fill the objects
83 scores = tf.boolean_mask(box_class_scores, filtering_mask)
84 boxes = tf.boolean_mask(boxes,filtering_mask)
85 classes = tf.boolean_mask(box_classes, filtering_mask)
86
87 return scores,boxes,classes
88
89 #Non-max suppression NMS algorithm
90 #IOU intersection over union
91 def iou(box1,box2):
92 """
93 implement the IOU between box1 and box2
94
95 box1: 1th box , list object with coordinates(x1,y1,x2,y2)
96 box2: 2th box , list object with coordinates(x1,y1,x2,y2)
97
98 """
99 #inter_area
100 xi1 = np.maximum(box1[0],box2[0])
101 xi2 = np.minimum(box1[2],box2[2])
102 yi1 = np.maximum(box1[1],box2[1])
103 yi2 = np.minimum(box1[3],box2[3])
104 inter_area = (xi2-xi1)*(yi2-yi1)
105 #union_area
106 box1_area = (box1[2]-box1[0])*(box1[3]-box1[1])
107 box2_area = (box2[2]-box2[0])*(box2[3]-box2[1])
108 union_area = (box1_area+box2_area)-inter_area
109
110 iou = inter_area/union_area
111 return iou
112
113 #nms non max suppression
114 #1.select the box that has the highest score
115 #2.compute its overlap with all other boxes,and remove boxes that overlap it more than threshold
116 #3.Go back to step and iterate until there's no more boxes with a lower score than the current
117 # selected box
118 #Tips: Tensorflow has two build in function s taht are used to implement nms
119 # tf.image.non_max_suppression()
120 # K.gather()
121 def yolo_nms(scores,boxes,classes,max_boxes=10,iou_threshold=0.5):
122 """
123 Applies Non-max suppression to set of boxes
124 Arguments:
125 scores -- tensor of shape(None,) output of yolo_filter_boxes()
126 boxes -- tensor of shape(None,4) output of yolo_filter_boxes()
127 classes -- just like others
128 max_boxes -- integer, maximum number of predicted boxes you'd like
129 iou_threshold -- real value, "iou" used for NMS filtering
130
131 Returns:
132 scores -- tensor of shape(,None), predicted score for each box
133 boxes -- ten..........(4,None), predicted box coordinates 框坐标
134 classes -- te.........(,None), predicted class for each box
135 Note:
136 the "None" dimension of the output tensor has obviously to be less than
137 max_boxes!!!
138 this function will transpose the shape of scores, boxes , classes.
139
140 """
141 # tensor to be used in tf.image.non_max_suppression()
142 max_boxes_tensor = K.variable(max_boxes,dtype = 'int32')
143 #initialize variable
144 K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
145 #get the indices corresponding to boxes you keep
146 #return: A 1-D integer Tensor of shape[M] representing the selected indices from the boxes tensor
147 #M <= max_output_size
148 nms_indices = tf.image.non_max_suppression(boxes,scores,max_output_size=max_boxes,iou_threshold=iou_threshold)
149
150 #use K.gather to select only nms_indices from scores,boxes and classes
151 scores = K.gather(scores,nms_indices)
152 classes = K.gather(classes,nms_indices)
153 boxes = K.gather(boxes,nms_indices)
154
155 return scores,boxes,classes
156
157 # Wrapping up the filtering
158 # deep cnn output(19,19,5,85)-->filtering through all the boxes using the functions you just implemented.
159
160 #the 1st tool: scale_boxes
161 #说白了就是你的anchor_box应该是图像分辨率等比缩小的
162 def scale_boxes(boxes,image_shape):
163 """
164 scales the predicted boxes in order to be drawable on the image
165 rescale the images so that they can be plotted on top of the original 720x1280
166 """
167 height = image_shape[0]
168 width = image_shape[1]
169 image_dims = K.stack([height,width,height,width])
170 #tips : K.stack 进行拼接操作
171 image_dims = K.reshape(image_dims,[1,4])
172 boxes = boxes*image_dims
173 return boxes
174
175 #implementation of yolo
176 #Step1---convert the output of yolo encoding(a lot of boxes) to your predicted boxes
177 #along with their scores, box coordinates and classes
178 def yolo_eval(yolo_outputs, image_shape=(720.,1280.),max_boxes=10, score_thresold=.6,\
179 iou_threshold = .5):
180 """
181 Arguments:
182 yolo_outputs -- output of the encoding model (for
183 image_shape of (608, 608, 3)), contains 4 tensors:
184 box_confidence: tensor of shape (None, 19, 19, 5, 1)
185 box_xy: tensor of shape (None, 19, 19, 5, 2)
186 box_wh: tensor of shape (None, 19, 19, 5, 2)
187 box_class_probs: tensor of shape (None, 19, 19, 5, 80)
188 image_shape -- tensor containing the input shape in this notebook we use (608., 608.) (has to be float32 dtype)
189 max_boxes -- integer, maximum number of predicted boxes you'd like
190 score_threshold -- real value, if if [ highest class probability score < threshold],
191 then get rid of the corresponding box
192 iou_threshold -- real value, real value, "iou" threshold used for NMS filtering.
193
194 Returns:
195 scores -- tensor of shape (None, ), predicted score for each box
196 boxes -- tensor of shape (None, 4), predicted box coordinates
197 classes -- tensor of shape (None,), predicted class for each box
198 """
199 #Retrieve the outputs of YOLO model
200 box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs
201 #convert boxes to be ready for yolo filter
202 boxes =yolo_boxes_to_corners(box_xy, box_wh)
203
204 #score-filtering with a threshold of score_threshold
205 scores,boxes,classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=score_thresold)
206 #scale boxes back to original
207 boxes = scale_boxes(boxes, image_shape=image_shape)
208 #use nms
209 scores,boxes,classes = yolo_nms(scores, boxes, classes, max_boxes=10, iou_threshold=iou_threshold)
210
211 return scores,boxes,classes
212
213 #Test yolo pretrained model on images
214 #use a pretrained model and test it on the car detection dataset.
215 # Step 1
216 sess = K.get_session()
217
218 # Step 2 recall the classes
219 def read_classes(classes_path):
220 """
221 Recall that we are trying to detect 80 classes
222 Let's load these quantities into the model by running the next cell.
223 """
224 # with open(classes_path) as f:
225 # class_names = f.readlines()
226 f = open(classes_path,"r",encoding="utf-8")
227 class_names = f.readlines()
228 # 去除空格
229 class_names = [c.strip() for c in class_names]
230 return class_names
231
232 class_names = read_classes("model_data/coco_classes.txt")
233 # print(class_names)
234
235 #step3 recall the anchors
236 #tips: split--一般用来切分
237 # strip--一般用来
238 def read_anchors(anchor_paths):
239 with open(anchor_paths) as f:
240 anchors = f.readline()
241 print(anchors)
242 print(type(anchors))
243 # anchors = [i.split(',') for i in anchors]
244 # print(anchors)
245 anchors = [float(x) for x in anchors.split(',')]
246 print(anchors)
247 anchors = np.array(anchors).reshape(-1,2)
248 print(anchors.shape)
249 print(anchors)
250 return anchors
251
252 anchors = read_anchors('model_data/yolo_anchors.txt')
253
254 image_shape = (720.,1280.)
255
256 #step4 loading a pretrained model
257
258 # Training a YOLO model takes a very long time and requires a fairly large dataset
259 # of labelled bounding boxes for a large range of target classes. You are going to
260 # load an existing pretrained Keras YOLO model stored in "yolo.h5". (These weights
261 # come from the official YOLO website, and were converted using a function written
262 # by Allan Zelener. References are at the end of this notebook. Technically,
263 # these are the parameters from the "YOLOv2" model, but we will more simply
264 # refer to it as "YOLO" in this notebook.) Run the cell below to load the model
265 # from this file.
266
267 yolo_model = load_model("model_data/yolo.h5")
268 #show the infos about the deep cnn
269 yolo_model.summary()
270 print(type(yolo_model))
271 #this model converts a preprocessed batch of input
272 #images (shape: (m, 608, 608, 3)) into a tensor of shape (m, 19, 19, 5, 85) as explained in Figure (2).
273
274 #step5
275 #Convert output of the model to usabloe bounding box tensors
276 #function : yolo_head-->Convert final layer features to bounding box parameters
277 yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))
278 print(type(yolo_outputs))
279
280 #step6 filtering boxes
281 scores,boxes,classes = yolo_eval(yolo_outputs, image_shape)
282
283 #step7: run the graph on an image
284 # yolo_model.input is given to yolo_model. The model is used to compute the output yolo_model.output
285 # yolo_model.output is processed by yolo_head. It gives you yolo_outputs
286 # yolo_outputs goes through a filtering function, yolo_eval. It outputs your predictions: scores, boxes, classes
287 # implement predict() which runs the graph to test YOLO on an image. You will need to run a
288 # Tensorflow session, to have it compute scores, boxes, classes
289
290 # def predict(sess,image_file):
291 # """
292 # this function could run the graph that store in "sess" to predict
293 # boxes for "image_file"
294 # print and plots the predictions
295
296 # Arguments:
297 # sess: your tensorflow/keras session containing the YOLO graph
298 # image_file: name of an image stored in the "image" folder
299
300 # Returns:
301 # out_scores: tensor of shape(None,), scores of the predicted boxes
302 # out_boxes: tensor of shape(None,4), coordinates of the predicted boxes
303 # out_classes: tensor of shape(None), class index of the predicted boxes
304
305 # Note: "None" actually represents the number of predicted boxes,
306 # it varies between 0 and max_boxes.
307 # """
308 # #Preprocessing your images
309 # image,image_data = preprocess_image("images/"+image_file, model_image_size=(608,608))
310
311 # #Run the session with the correct tensors and choose the correct placeholders in the feed_dict
312 # out_scores,out_boxes,out_classes = sess.run({scores,boxes,classes},feed_dict={
313 # yolo_model.input:image_data,K.learning_phase():0})
314
315 # #Apply different colors to different classes
316 # colors = generate_colors(class_names)
317 # #draw bounding boxes on the image file
318 # draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
319 # #Save the predicted bounding box on the image
320 # image.save(os.path.join("out",image_file),quality=90)
321 # #Display the result
322 # output_image = scipy.misc.imread(os.path.join("out",image_file))
323 # imshow(output_image)
324
325 # return out_scores, out_boxes, out_classes
326
327 # fig,ax = plt.subplot()
328
329 def predict(sess, image_file):
330 """
331 Runs the graph stored in "sess" to predict boxes for "image_file". Prints and plots the preditions.
332
333 Arguments:
334 sess -- your tensorflow/Keras session containing the YOLO graph
335 image_file -- name of an image stored in the "images" folder.
336
337 Returns:
338 out_scores -- tensor of shape (None, ), scores of the predicted boxes
339 out_boxes -- tensor of shape (None, 4), coordinates of the predicted boxes
340 out_classes -- tensor of shape (None, ), class index of the predicted boxes
341
342 Note: "None" actually represents the number of predicted boxes, it varies between 0 and max_boxes.
343 """
344
345 # Preprocess your image
346 image, image_data = preprocess_image("images/" + image_file, model_image_size = (608, 608))
347
348 # Run the session with the correct tensors and choose the correct placeholders in the feed_dict.
349 # You'll need to use feed_dict={yolo_model.input: ... , K.learning_phase(): 0})
350 ### START CODE HERE ### (≈ 1 line)
351 out_scores, out_boxes, out_classes = sess.run([scores,boxes,classes],feed_dict={yolo_model.input:image_data,K.learning_phase():0})
352 ### END CODE HERE ###
353
354 # Print predictions info
355 print('Found {} boxes for {}'.format(len(out_boxes), image_file))
356 # Generate colors for drawing bounding boxes.
357 colors = generate_colors(class_names)
358 # Draw bounding boxes on the image file
359 draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
360 # Save the predicted bounding box on the image
361 image.save(os.path.join("out", image_file), quality=90)
362 # Display the results in the notebook
363 output_image = scipy.misc.imread(os.path.join("out", image_file))
364
365
366 imshow(output_image)
367 # 去除图像周围的白边
368 # height, width, channels = output_image.shape
369 # 如果dpi=300,那么图像大小=height*width
370 # fig.set_size_inches(width/100.0/3.0, height/100.0/3.0)
371 # plt.gca().xaxis.set_major_locator(plt.NullLocator())
372 # plt.gca().yaxis.set_major_locator(plt.NullLocator())
373 # plt.subplots_adjust(top=1,bottom=0,left=0,right=1,hspace=0,wspace=0)
374 plt.margins(0,0)
375 # plt.show()
376 plt.ion()
377 plt.axis('off')
378 plt.pause(0.02)
379
380 # plt.close()
381 return out_scores, out_boxes, out_classes
382
383
384
385
386 for i in range(119):
387 if i < 9:
388 out_scores, out_boxes, out_classes = predict(sess, "000"+ str(i+1)+".jpg")
389 else:
390 out_scores, out_boxes, out_classes = predict(sess, "00"+ str(i+1)+".jpg")
391
392 #Test 1
393 #1.1 test the boolean mask
394 # tensor = [0, 1, 2, 3]
395 # mask = np.array([True, False, True, False])
396 # boolean_mask(tensor, mask) # [0, 2]
397 #1.2 test yolo_filter_box
398 # with tf.Session() as test_a:
399 # box_confidence = tf.random_normal([19,19,5,1],mean=1, stddev= 4,seed=1)
400 # boxes_1 = tf.random_normal([19,19,5,4],mean=1,stddev=4,seed=1)
401 # box_class_probs = tf.random_normal([19,19,5,80],mean=1,stddev=4,seed=1)
402 # scores,boxes,classes = yolo_filter_boxes(box_confidence, boxes_1, box_class_probs,threshold=0.5)
403 # print("scores[2] = " + str(scores[2].eval()))
404 # print("boxes[2] = " + str(boxes[2].eval()))
405 # print("classes[2] = " + str(classes[2].eval()))
406 # print("scores.shape = " + str(scores.shape))
407 # print("boxes.shape = " + str(boxes.shape))
408 # print("classes.shape = " + str(classes.shape))
409 # print(str(scores.eval()))
410 # print(str(box_class_probs[17,18,:,1:5].eval()))
411
412 #Test2 nms
413 #2.1 iou test
414 # box1 = (2,1,4,3)
415 # box2 = (1,2,3,4)
416 # print("iou="+str(iou(box1,box2)))
417 #2.2 nms
418 # with tf.Session() as test_b:
419 # scores = tf.random_normal([54,],mean=1,stddev=4,seed=1)
420 # boxes = tf.random_normal([54,4],mean=1,stddev=4,seed=1)
421 # classes = tf.random_normal([54,],mean=1,stddev=4,seed=1)
422 # scores,boxes,classes = yolo_nms(scores, boxes, classes)
423 # print("scores[2] = " + str(scores[2].eval()))
424 # print("boxes[2] = " + str(boxes[2].eval()))
425 # print("classes[2] = " + str(classes[2].eval()))
426 # print("scores.shape = " + str(scores.eval().shape))
427 # print("boxes.shape = " + str(boxes.eval().shape))
428 # print("classes.shape = " + str(classes.eval().shape))
429
430 #Test3 yolo_eval
431 # with tf.Session() as test_c:
432 # yolo_outputs = (tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed = 1),
433 # tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed = 1),
434 # tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed = 1),
435 # tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed = 1))
436 # scores, boxes, classes = yolo_eval(yolo_outputs)
437 # print("scores[2] = " + str(scores[2].eval()))
438 # print("boxes[2] = " + str(boxes[2].eval()))
439 # print("classes[2] = " + str(classes[2].eval()))
440 # print("scores.shape = " + str(scores.eval().shape))
441 # print("boxes.shape = " + str(boxes.eval().shape))
442 # print("classes.shape = " + str(classes.eval().shape))