PaddleHub(3)——头部姿态估计

一、PaddleHub头部姿势估计

头部姿势估计是一个典型而复杂的计算机视觉问题，在人脸识别、虹膜识别、视线估计、表情分析等研究领域有着广泛的应用前景，具有重要的学术研究价值。如今人脸识别已经渐渐为人们所熟知，PaddleHub 近期发布了人脸关键点检测模型face_landmark_localization，该模型转换自 https://github.com/lsy17096535/face-landmark ，可以识别人脸中的68个关键点，实现机器对图片人物姿态进行解释。所以我想设计一个简单的头部姿势估计的项目。

具体一个物体相对于相机的姿态可以使用旋转矩阵和平移矩阵来表示，通过将图片中的人脸关键点投影到三维人脸模型上，根据二维和三维坐标变换关系矩阵，求解欧拉角，得出参数。

平移矩阵：物体相对于相机的空间位置关系矩阵，用T表示；

旋转矩阵：物体相对于相机的空间姿态关系矩阵，用R表示；

坐标系转换：

世界坐标系到相机坐标系————相机坐标系到像素坐标系————图像中心坐标系到像素坐标系——
（上式的求解可用DLT(Direct Linear Transform)算法结合最小二乘进行迭代求解）

——得到旋转矩阵后，就可以求解欧拉角

二、主要代码

1、载入模块，加入所引用的目录

import cv2
import numpy as np
import paddlehub as hub

2、加载头部关键点坐标，头部投影点坐标，显示投影点连线坐标

 1 # 头部三维关键点坐标
 2 self.model_points = np.array([
 3     [6.825897, 6.760612, 4.402142],
 4     [1.330353, 7.122144, 6.903745],
 5     [-1.330353, 7.122144, 6.903745],
 6     [-6.825897, 6.760612, 4.402142],
 7     [5.311432, 5.485328, 3.987654],
 8     [1.789930, 5.393625, 4.413414],
 9     [-1.789930, 5.393625, 4.413414],
10     [-5.311432, 5.485328, 3.987654],
11     [2.005628, 1.409845, 6.165652],
12     [-2.005628, 1.409845, 6.165652],
13     [2.774015, -2.080775, 5.048531],
14     [-2.774015, -2.080775, 5.048531],
15     [0.000000, -3.116408, 6.097667],
16     [0.000000, -7.415691, 4.070434]
17 ], dtype='float')
18 # 头部投影点
19 self.reprojectsrc = np.float32([
20     [10.0, 10.0, 10.0],
21     [10.0, -10.0, 10.0],
22     [-10.0, 10.0, 10.0],
23     [-10.0, -10.0, 10.0]])
24 # 投影点连线
25 self.line_pairs = [
26     [0, 2], [1, 3], [0, 1], [2, 3]]

3、从face_landmark_localization的检测结果抽取姿态估计需要的点坐标

 1  #face_landmark_localization的检测结果抽取姿态估计需要的点坐标
 2    
 3         image_points = np.array([
 4             face_landmark[17], face_landmark[21], 
 5             face_landmark[22], face_landmark[26], 
 6             face_landmark[36], face_landmark[39], 
 7             face_landmark[42], face_landmark[45], 
 8             face_landmark[31], face_landmark[35],
 9             face_landmark[48], face_landmark[54],
10             face_landmark[57], face_landmark[8],
11             face_landmark[14], face_landmark[2], 
12             face_landmark[32], face_landmark[33],
13             face_landmark[34], 
14             ], dtype='float')
15         return image_points
16     
17     def caculate_pose_vector(self, image_points):

4、获取旋转向量和平移向量

 1 def get_pose_vector(self, image_points):
 2       center = (self.img_size[1] / 2, self.img_size[0] / 2)
 3       focal_length = self.img_size[1]
 4       camera_matrix = np.array([
 5           [focal_length, 0, center[0]],
 6           [0, focal_length, center[1]],
 7           [0, 0, 1]],
 8           dtype="float")
 9       dist_coeffs = np.zeros((4, 1))
10       ret, rotation_vector, translation_vector = cv2.solvePnP(self.model_points,image_points,camera_matrix,dist_coeffs)
11       reprojectdst, ret = cv2.projectPoints(self.reprojectsrc, rotation_vector, translation_vector, camera_matrix,dist_coeffs)
12       return rotation_vector, translation_vector, camera_matrix, dist_coeffs, reprojectdst

5、求欧拉角

1    rvec_matrix = cv2.Rodrigues(rotation_vector)[0]
2         proj_matrix = np.hstack((rvec_matrix, translation_vector))
3         euler_angles = cv2.decomposeProjectionMatrix(proj_matrix)[6]
4         pitch, yaw, roll = [math.radians(_) for _ in euler_angles]
5         return  euler_angles

6、在图中投影出结果

 1 # 画出投影正方体
 2             alpha=0.3
 3             if not hasattr(self, 'before'):
 4                 self.before = reprojectdst
 5             else:
 6                 reprojectdst = alpha * self.before + (1-alpha)* reprojectdst
 7             reprojectdst = tuple(map(tuple, reprojectdst.reshape(8, 2)))
 8             for start, end in self.line_pairs:
 9                 cv2.line(img, reprojectdst[start], reprojectdst[end], (0, 0, 255))
10 
11  # 计算头部欧拉角
12             pitch, yaw, roll = self.caculate_euler_angle(rotation_vector, translation_vector)
13             cv2.putText(img, "pitch: " + "{:7.2f}".format(pitch), (20, int(self.img_size[0]/2 -10)), cv2.FONT_HERSHEY_SIMPLEX,
14                         0.75, (0, 0, 255), thickness=2)
15             cv2.putText(img, "yaw: " + "{:7.2f}".format(yaw), (20, int(self.img_size[0]/2 + 30) ), cv2.FONT_HERSHEY_SIMPLEX,
16                         0.75, (0, 0, 255), thickness=2)
17             cv2.putText(img, "roll: " + "{:7.2f}".format(roll), (20, int(self.img_size[0]/2 +70)), cv2.FONT_HERSHEY_SIMPLEX,
18                         0.75, (0, 0, 255), thickness=2)
19             for index, action in enumerate(index_action):
20                 cv2.putText(img, "{}".format(self._index_action[action]), index_action[action][1], 
21                         cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50, 50, 50), thickness=2)
22             frames_euler.append([index, img, pitch, yaw, roll])