openpose pytorch代码分析

github: https://github.com/tensorboy/pytorch_Realtime_Multi-Person_Pose_Estimation

  1 # -*- coding: utf-8 -*
  2 import os
  3 import re
  4 import sys
  5 import cv2
  6 import math
  7 import time
  8 import scipy
  9 import argparse
 10 import matplotlib
 11 import numpy as np
 12 import pylab as plt
 13 from joblib import Parallel, delayed
 14 import util
 15 import torch
 16 import torch as T
 17 import torch.nn as nn
 18 import torch.nn.functional as F
 19 from torch.autograd import Variable
 20 from collections import OrderedDict
 21 from config_reader import config_reader
 22 from scipy.ndimage.filters import gaussian_filter
 23 #parser = argparse.ArgumentParser()
 24 #parser.add_argument('--t7_file', required=True)
 25 #parser.add_argument('--pth_file', required=True)
 26 #args = parser.parse_args()
 27 
 28 torch.set_num_threads(torch.get_num_threads())
 29 weight_name = './model/pose_model.pth'
 30 
 31 blocks = {}
 32 # 从1开始算的limb,图对应:Pose Output Format
 33 # find connection in the specified sequence, center 29 is in the position 15
 34 limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \
 35            [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \
 36            [1,16], [16,18], [3,17], [6,18]]
 37            
 38 # the middle joints heatmap correpondence
 39 mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], \
 40           [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], \
 41           [55,56], [37,38], [45,46]]
 42           
 43 # visualize
 44 colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
 45           [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
 46           [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
 47           
 48 # heatmap channel为19 表示关节点的score
 49 # PAF channel为38 表示limb的单位向量
 50 block0  = [{'conv1_1':[3,64,3,1,1]},{'conv1_2':[64,64,3,1,1]},{'pool1_stage1':[2,2,0]},{'conv2_1':[64,128,3,1,1]},{'conv2_2':[128,128,3,1,1]},{'pool2_stage1':[2,2,0]},{'conv3_1':[128,256,3,1,1]},{'conv3_2':[256,256,3,1,1]},{'conv3_3':[256,256,3,1,1]},{'conv3_4':[256,256,3,1,1]},{'pool3_stage1':[2,2,0]},{'conv4_1':[256,512,3,1,1]},{'conv4_2':[512,512,3,1,1]},{'conv4_3_CPM':[512,256,3,1,1]},{'conv4_4_CPM':[256,128,3,1,1]}]
 51 
 52 blocks['block1_1']  = [{'conv5_1_CPM_L1':[128,128,3,1,1]},{'conv5_2_CPM_L1':[128,128,3,1,1]},{'conv5_3_CPM_L1':[128,128,3,1,1]},{'conv5_4_CPM_L1':[128,512,1,1,0]},{'conv5_5_CPM_L1':[512,38,1,1,0]}]
 53 
 54 blocks['block1_2']  = [{'conv5_1_CPM_L2':[128,128,3,1,1]},{'conv5_2_CPM_L2':[128,128,3,1,1]},{'conv5_3_CPM_L2':[128,128,3,1,1]},{'conv5_4_CPM_L2':[128,512,1,1,0]},{'conv5_5_CPM_L2':[512,19,1,1,0]}]
 55 
 56 for i in range(2,7):
 57     blocks['block%d_1'%i]  = [{'Mconv1_stage%d_L1'%i:[185,128,7,1,3]},{'Mconv2_stage%d_L1'%i:[128,128,7,1,3]},{'Mconv3_stage%d_L1'%i:[128,128,7,1,3]},{'Mconv4_stage%d_L1'%i:[128,128,7,1,3]},
 58 {'Mconv5_stage%d_L1'%i:[128,128,7,1,3]},{'Mconv6_stage%d_L1'%i:[128,128,1,1,0]},{'Mconv7_stage%d_L1'%i:[128,38,1,1,0]}]
 59     blocks['block%d_2'%i]  = [{'Mconv1_stage%d_L2'%i:[185,128,7,1,3]},{'Mconv2_stage%d_L2'%i:[128,128,7,1,3]},{'Mconv3_stage%d_L2'%i:[128,128,7,1,3]},{'Mconv4_stage%d_L2'%i:[128,128,7,1,3]},
 60 {'Mconv5_stage%d_L2'%i:[128,128,7,1,3]},{'Mconv6_stage%d_L2'%i:[128,128,1,1,0]},{'Mconv7_stage%d_L2'%i:[128,19,1,1,0]}]
 61 
 62 def make_layers(cfg_dict):
 63     layers = []
 64     for i in range(len(cfg_dict)-1):
 65         one_ = cfg_dict[i]
 66         for k,v in one_.iteritems():      
 67             if 'pool' in k:
 68                 layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2] )]
 69             else:
 70                 conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride = v[3], padding=v[4])
 71                 layers += [conv2d, nn.ReLU(inplace=True)]
 72     one_ = cfg_dict[-1].keys()
 73     k = one_[0]
 74     v = cfg_dict[-1][k]
 75     conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride = v[3], padding=v[4])
 76     layers += [conv2d]
 77     return nn.Sequential(*layers)
 78     
 79 layers = []
 80 for i in range(len(block0)):
 81     one_ = block0[i]
 82     for k,v in one_.iteritems():      
 83         if 'pool' in k:
 84             layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2] )]
 85         else:
 86             conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride = v[3], padding=v[4])
 87             layers += [conv2d, nn.ReLU(inplace=True)]  
 88        
 89 models = {}           
 90 models['block0']=nn.Sequential(*layers)        
 91 
 92 for k,v in blocks.iteritems():
 93     models[k] = make_layers(v)
 94                 
 95 class pose_model(nn.Module):
 96     def __init__(self,model_dict,transform_input=False):
 97         super(pose_model, self).__init__()
 98         self.model0   = model_dict['block0']
 99         self.model1_1 = model_dict['block1_1']        
100         self.model2_1 = model_dict['block2_1']  
101         self.model3_1 = model_dict['block3_1']  
102         self.model4_1 = model_dict['block4_1']  
103         self.model5_1 = model_dict['block5_1']  
104         self.model6_1 = model_dict['block6_1']  
105         
106         self.model1_2 = model_dict['block1_2']        
107         self.model2_2 = model_dict['block2_2']  
108         self.model3_2 = model_dict['block3_2']  
109         self.model4_2 = model_dict['block4_2']  
110         self.model5_2 = model_dict['block5_2']  
111         self.model6_2 = model_dict['block6_2']
112         
113     def forward(self, x):    
114         out1 = self.model0(x)
115         
116         out1_1 = self.model1_1(out1)
117         out1_2 = self.model1_2(out1)
118         out2  = torch.cat([out1_1,out1_2,out1],1)
119         
120         out2_1 = self.model2_1(out2)
121         out2_2 = self.model2_2(out2)
122         out3   = torch.cat([out2_1,out2_2,out1],1)
123         
124         out3_1 = self.model3_1(out3)
125         out3_2 = self.model3_2(out3)
126         out4   = torch.cat([out3_1,out3_2,out1],1)
127 
128         out4_1 = self.model4_1(out4)
129         out4_2 = self.model4_2(out4)
130         out5   = torch.cat([out4_1,out4_2,out1],1)  
131         
132         out5_1 = self.model5_1(out5)
133         out5_2 = self.model5_2(out5)
134         out6   = torch.cat([out5_1,out5_2,out1],1)         
135               
136         out6_1 = self.model6_1(out6)
137         out6_2 = self.model6_2(out6)
138         
139         return out6_1,out6_2        
140 
141 
142 model = pose_model(models)     
143 model.load_state_dict(torch.load(weight_name))
144 model.cuda()
145 model.float()
146 model.eval()
147 
148 param_, model_ = config_reader()
149 
150 #torch.nn.functional.pad(img pad, mode='constant', value=model_['padValue'])
151 tic = time.time()
152 test_image = './sample_image/ski.jpg'
153 #test_image = 'a.jpg'
154 oriImg = cv2.imread(test_image) # B,G,R order
155 imageToTest = Variable(T.transpose(T.transpose(T.unsqueeze(torch.from_numpy(oriImg).float(),0),2,3),1,2),volatile=True).cuda()
156 
157 multiplier = [x * model_['boxsize'] / oriImg.shape[0] for x in param_['scale_search']] # 不同scale输入
158 
159 heatmap_avg = torch.zeros((len(multiplier),19,oriImg.shape[0], oriImg.shape[1])).cuda()
160 paf_avg = torch.zeros((len(multiplier),38,oriImg.shape[0], oriImg.shape[1])).cuda()
161 #print heatmap_avg.size()
162 
163 toc =time.time()
164 print 'time is %.5f'%(toc-tic) 
165 tic = time.time()
166 for m in range(len(multiplier)):
167     scale = multiplier[m]
168     h = int(oriImg.shape[0]*scale)
169     w = int(oriImg.shape[1]*scale)
170     pad_h = 0 if (h%model_['stride']==0) else model_['stride'] - (h % model_['stride']) 
171     pad_w = 0 if (w%model_['stride']==0) else model_['stride'] - (w % model_['stride'])
172     new_h = h+pad_h
173     new_w = w+pad_w
174 
175     imageToTest = cv2.resize(oriImg, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
176     imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_['stride'], model_['padValue'])
177     imageToTest_padded = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,2,0,1))/256 - 0.5
178     # (-0.5~0.5)
179     feed = Variable(T.from_numpy(imageToTest_padded)).cuda()      
180     output1,output2 = model(feed)
181     print output1.size()
182     print output2.size()
183     heatmap = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output2) # 对output上采样至原图大小
184     
185     paf = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output1)     # 同理
186 
187     heatmap_avg[m] = heatmap[0].data
188     paf_avg[m] = paf[0].data  
189     
190     
191 toc =time.time()
192 print 'time is %.5f'%(toc-tic) 
193 tic = time.time()
194 # 不同scale的heatmap和PAF取均值
195 heatmap_avg = T.transpose(T.transpose(T.squeeze(T.mean(heatmap_avg, 0)),0,1),1,2).cuda() 
196 paf_avg     = T.transpose(T.transpose(T.squeeze(T.mean(paf_avg, 0)),0,1),1,2).cuda() 
197 heatmap_avg=heatmap_avg.cpu().numpy()
198 paf_avg    = paf_avg.cpu().numpy()
199 toc =time.time()
200 print 'time is %.5f'%(toc-tic) 
201 tic = time.time()
202 
203 all_peaks = []
204 peak_counter = 0
205 
206 #maps =
207 # picture array is reversed
208 for part in range(18): # 18个关节点的featuremap
209     map_ori = heatmap_avg[:,:,part]
210     map = gaussian_filter(map_ori, sigma=3)
211     
212     map_left = np.zeros(map.shape)
213     map_left[1:,:] = map[:-1,:]
214     map_right = np.zeros(map.shape)
215     map_right[:-1,:] = map[1:,:]
216     map_up = np.zeros(map.shape)
217     map_up[:,1:] = map[:,:-1]
218     map_down = np.zeros(map.shape)
219     map_down[:,:-1] = map[:,1:]
220 
221     # 计算是否为局部极值
222     peaks_binary = np.logical_and.reduce((map>=map_left, map>=map_right, map>=map_up, map>=map_down, map > param_['thre1']))
223 #    peaks_binary = T.eq(
224 #    peaks = zip(T.nonzero(peaks_binary)[0],T.nonzero(peaks_binary)[0])
225     
226     peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse
227     
228     peaks_with_score = [x + (map_ori[x[1],x[0]],) for x in peaks]
229     id = range(peak_counter, peak_counter + len(peaks))
230     peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]
231 
232     all_peaks.append(peaks_with_score_and_id) # 一个关节点featuremap上不同人的peak [[y, x, peak_score, id)],...]
233     peak_counter += len(peaks)
234     
235     
236     
237     
238 # 计算线性积分 采样10个点计算
239 connection_all = []
240 special_k = []
241 mid_num = 10
242 
243 for k in range(len(mapIdx)):
244     score_mid = paf_avg[:,:,[x-19 for x in mapIdx[k]]] # channel为2的paf_avg,表示PAF向量
245     candA = all_peaks[limbSeq[k][0]-1]  #第k个limb中左关节点的候选集合A(不同人的关节点)
246     candB = all_peaks[limbSeq[k][1]-1]  #第k个limb中右关节点的候选集合B(不同人的关节点)
247     nA = len(candA)
248     nB = len(candB)
249     # indexA, indexB = limbSeq[k]
250     if(nA != 0 and nB != 0): # 有候选时开始连接
251         connection_candidate = []
252         for i in range(nA):
253             for j in range(nB):
254                 vec = np.subtract(candB[j][:2], candA[i][:2])
255                 norm = math.sqrt(vec[0]*vec[0] + vec[1]*vec[1])
256                 vec = np.divide(vec, norm)  # 计算单位向量
257                 
258                 startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
259                                np.linspace(candA[i][1], candB[j][1], num=mid_num)) # 在A[i],B[j]连接线上采样mid_num个点
260 
261                 # 计算采样点的PAF向量
262                 vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
263                                   for I in range(len(startend))])
264                 vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
265                                   for I in range(len(startend))])
266 
267                 # 采样点的PAF向量与limb的单位向量计算余弦相似度score,内积
268                 score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
269                 score_with_dist_prior = sum(score_midpts)/len(score_midpts) + min(0.5*oriImg.shape[0]/norm-1, 0)
270                 criterion1 = len(np.nonzero(score_midpts > param_['thre2'])[0]) > 0.8 * len(score_midpts)
271                 criterion2 = score_with_dist_prior > 0
272                 if criterion1 and criterion2:
273                     # (i,j,score,score_all)
274                     connection_candidate.append([i, j, score_with_dist_prior, score_with_dist_prior+candA[i][2]+candB[j][2]])
275 
276         connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) # 按score排序
277         connection = np.zeros((0,5))
278         for c in range(len(connection_candidate)):
279             i,j,s = connection_candidate[c][0:3]
280             if(i not in connection[:,3] and j not in connection[:,4]):
281                 connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) # A_id, B_id, score, i, j
282                 if(len(connection) >= min(nA, nB)):
283                     break
284 
285         connection_all.append(connection) # 多个符合当前limb的组合 [[A_id, B_id, score, i, j],...]
286     else:
287         special_k.append(k)
288         connection_all.append([])
289 
290 '''
291 function: 关节点连成每个人的limb
292 subset: last number in each row is the total parts number of that person
293 subset: the second last number in each row is the score of the overall configuration
294 candidate: 候选关节点
295 connection_all: 候选limb
296 
297 '''
298 subset = -1 * np.ones((0, 20))
299 candidate = np.array([item for sublist in all_peaks for item in sublist])  # 一个id的(y,x,score,id)(关节点)
300 
301 for k in range(len(mapIdx)):
302     if k not in special_k:
303         partAs = connection_all[k][:,0]  # 第k个limb,左端点的候选id集合
304         partBs = connection_all[k][:,1]  # 第k个limb,右端点的候选id集合
305         indexA, indexB = np.array(limbSeq[k]) - 1 # 关节点index
306 
307         for i in range(len(connection_all[k])): #= 1:size(temp,1)
308             found = 0
309             subset_idx = [-1, -1]
310             for j in range(len(subset)): #1:size(subset,1): 遍历subset里每个人,看当前两个关节点出现过几次
311                 if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
312                     subset_idx[found] = j
313                     found += 1
314             
315             if found == 1: # 在这个人的subset里连上这个limb
316                 j = subset_idx[0]
317                 if(subset[j][indexB] != partBs[i]):
318                     subset[j][indexB] = partBs[i]
319                     subset[j][-1] += 1
320                     subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
321                 elif(subset[j][indexA] != partAs[i]):
322                     subset[j][indexA] = partAs[i]
323                     subset[j][-1] += 1
324                     subset[j][-2] += candidate[partAs[i].astype(int), 2] + connection_all[k][i][2]
325 
326             elif found == 2: # if found 2 and disjoint, merge them
327                 j1, j2 = subset_idx
328                 print "found = 2"
329                 membership = ((subset[j1]>=0).astype(int) + (subset[j2]>=0).astype(int))[:-2]
330                 if len(np.nonzero(membership == 2)[0]) == 0:
331                     # 如果两个人的相同关节点没有在各自的subset中都连成limb,那么合并两个subset构成一个人
332                     subset[j1][:-2] += (subset[j2][:-2] + 1)
333                     subset[j1][-2:] += subset[j2][-2:]
334                     subset[j1][-2] += connection_all[k][i][2]
335                     subset = np.delete(subset, j2, 0)
336                 # To-Do 这里有问题, 怎么合并才对?
337                 # else: # as like found == 1
338                 #     subset[j1][indexB] = partBs[i]
339                 #     subset[j1][-1] += 1
340                 #     subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
341 
342             # if find no partA in the subset, create a new subset
343             elif not found and k < 17:
344                 row = -1 * np.ones(20)
345                 row[indexA] = partAs[i]
346                 row[indexB] = partBs[i]
347                 row[-1] = 2
348                 row[-2] = sum(candidate[connection_all[k][i,:2].astype(int), 2]) + connection_all[k][i][2]
349                 subset = np.vstack([subset, row])
350 
351 # delete some rows of subset which has few parts occur
352 deleteIdx = [];
353 for i in range(len(subset)):
354     if subset[i][-1] < 4 or subset[i][-2]/subset[i][-1] < 0.4:
355         deleteIdx.append(i)
356 subset = np.delete(subset, deleteIdx, axis=0)
357 
358 canvas = cv2.imread(test_image) # B,G,R order
359 for i in range(18):
360     for j in range(len(all_peaks[i])):
361         cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1)
362 
363 stickwidth = 4
364 
365 for i in range(17):
366     for n in range(len(subset)):
367         index = subset[n][np.array(limbSeq[i])-1] # limb的两个关节点index
368         if -1 in index:
369             continue
370         cur_canvas = canvas.copy()
371         Y = candidate[index.astype(int), 0] # 两个index点的纵坐标
372         X = candidate[index.astype(int), 1] # 两个index点的横坐标
373         mX = np.mean(X)
374         mY = np.mean(Y)
375         length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
376         angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
377         polygon = cv2.ellipse2Poly((int(mY),int(mX)), (int(length/2), stickwidth), int(angle), 0, 360, 1)
378         cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
379         canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
380 
381 #Parallel(n_jobs=1)(delayed(handle_one)(i) for i in range(18))
382 
383 toc =time.time()
384 print 'time is %.5f'%(toc-tic)     
385 cv2.imwrite('result.png',canvas)   

 

posted @ 2018-05-04 00:03  demianzhang  阅读(6233)  评论(0编辑  收藏  举报