mono-vo的opencv-python实现

一个关于单目里程计的项目,使用C++的opencv搭环境很复杂,总是调不好,于是把原来的c++代码迁移到了python上。

原项目地址:

https://github.com/avisingh599/mono-vo

作者的讲解博客

http://avisingh599.github.io/vision/monocular-vo/

原项目没有Readme,我来写一个:

传统 单目里程计(mono-vo)的原理是利用两帧之间的特征点,通过特征匹配反推出相机运动,相机运动有6自由度(DoF)。通过不停的匹配当前帧与第一帧的特征点,推算出当前帧相对于第一帧的位姿,由此得到一个连续的相机运动。

 

小细节:

1. 特征点采用的是FAST,具体实现是opencv自带的FastFeatureDetector_create();特征点追踪则使用calcOpticalFlowPyrLK();特征点选取要慎重,各种参数需要实践中调节。

2. 本项目通过本征矩阵恢复相机运动,方法是opencv封装好的findEssentialMat()

3. 本征矩阵等一系列对极几何方法都存在尺度模糊问题,本项目引入KITTI自带的标签作为人为监督(场外信息),具体见代码中scale变量的获得方法。

4. 每一次都是计算“当前帧与第一帧”的相机位姿,而不是“当前帧与上一帧”的位姿,因为后者更容易产生漂移(误差)。

5. 相机位姿理论上应该得到6DoF,但是本项目用来画图的只是其中的坐标信息,画出的图可以视作一个俯视图。

6. 焦距(focal length)和主点(priciple point)是相机内参,在执行代码前需要确定。KITTI的相机内参可以观察它提供的calib.txt里的内参矩阵得到。

 

为了方便对照,变量名和逻辑都没有变动。

下面是一个头文件代码,提供了两个函数,对应原项目的vo_feature.py

import cv2 as cv
import numpy as np


def featureTracking(img_1, img_2, points1):
    # this function automatically gets rid of points for which tracking fails
    lk_params = dict(winSize=(21, 21),
                     maxLevel=3,
                     criteria=(cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 30, 0.01),
                     flags=cv.OPTFLOW_LK_GET_MIN_EIGENVALS,
                     minEigThreshold=0.001)
    points2, status, err = cv.calcOpticalFlowPyrLK(img_1, img_2, points1, None, **lk_params)

    # getting rid of points for which the KLT tracking failed or those who have gone outside the frame
    mask = np.all(points2 > 0, axis=1)
    status = np.squeeze(status > 0)
    mask = [mask[i] and status[i] for i in range(len(mask))]

    points1 = points1[mask]
    points2 = points2[mask]

    return points1, points2


def featureDetection(img_1):
    fast_threshold = 100
    nonmaxSuppression = True
    # default: type==cv.FAST_FEATURE_DETECTOR_TYPE_9_1 (value=2)
    # other choices: cv.FAST_FEATURE_DETECTOR_TYPE_5_8 (value=0)/cv.FAST_FEATURE_DETECTOR_TYPE_7_12 (value=1)
    fast = cv.FastFeatureDetector_create(threshold=fast_threshold, nonmaxSuppression=nonmaxSuppression)
    keypoints_1 = fast.detect(img_1, None)
    points1 = cv.KeyPoint_convert(keypoints_1)
    return points1

下面是visodo.py:

from vo_features import featureDetection, featureTracking
import cv2 as cv
import numpy as np
import os


def getMaxFrame(path):
    files_name = os.listdir(path)
    return len(files_name)


def getAbsoluteScale(frame_id):
    try:
        with open("./00.txt", "r") as myfile:#这里00.txt是KITTI中自带的标签信息
            lines = myfile.readlines()
            line = lines[frame_id - 1]
            data = line.split(" ")
            x_prev, y_prev, z_prev = float(data[3]), float(data[7]), float(data[-1])
            line = lines[frame_id]
            data = line.split(" ")
            x, y, z = float(data[3]), float(data[7]), float(data[-1])
        return ((x - x_prev) ** 2 + (y - y_prev) ** 2 + (z - z_prev) ** 2) ** (1 / 2)
    except:
        print("Unable to open file")
        return 0


def main():
    PATH = r'C:/lab/frames/'  #your own dir
    MAX_FRAME = getMaxFrame(PATH)
    MIN_NUM_FEAT = 2000

    files_name = os.listdir(PATH)

    filename1 = PATH + '{}'.format(files_name[0])
    filename2 = PATH + '{}'.format(files_name[1])

    fontFace = cv.FONT_HERSHEY_PLAIN
    fontScale = 1
    thickness = 1
    textOrg = (10, 50)

    img_1_c = cv.imread(filename1)
    img_2_c = cv.imread(filename2)

    img_1 = cv.cvtColor(img_1_c, cv.COLOR_BGR2GRAY)
    img_2 = cv.cvtColor(img_2_c, cv.COLOR_BGR2GRAY)

    points1 = featureDetection(img_1)
    points1, points2 = featureTracking(img_1, img_2, points1)

    focal = 718.8560
    pp = (607.1928, 185.2157)  # principle point

    E, mask = cv.findEssentialMat(points2, points1, focal, pp, cv.RANSAC, 0.999, 1.0)
    _, R, t, _ = cv.recoverPose(E, points2, points1, focal=focal, pp=pp, mask=mask)
    t = np.squeeze(t)
    # print(np.linalg.det(R))

    cv.namedWindow("Road facing camera", cv.WINDOW_AUTOSIZE)  # Create a window for display.
    cv.namedWindow("Trajectory", cv.WINDOW_AUTOSIZE)  # Create a window for display.
    traj = np.ones((800, 800, 3), dtype=np.uint8) * 255

    prevImage = img_2
    prevFeatures = points2
    R_f = R
    t_f = t

    for numFrame in range(2, MAX_FRAME):
        filename = PATH + '{}'.format(files_name[numFrame])
        currImage_c = cv.imread(filename)
        currImage = cv.cvtColor(currImage_c, cv.COLOR_BGR2GRAY)

        prevFeatures, currFeatures = featureTracking(prevImage, currImage, prevFeatures)

        E, mask = cv.findEssentialMat(currFeatures, prevFeatures, focal, pp, cv.RANSAC, 0.999, 1.0)
        _, R, t, _ = cv.recoverPose(E, currFeatures, prevFeatures, focal=focal, pp=pp, mask=mask)
        t = np.squeeze(t)
        scale = getAbsoluteScale(numFrame)
        # print(scale)

        if scale > 0.1 and t[2] > t[0] and t[2] > t[1]:
            t_f = t_f + scale * np.dot(R_f, t)
            R_f = np.dot(R, R_f)

        if len(prevFeatures) < MIN_NUM_FEAT:
            prevFeatures = featureDetection(prevImage)
            prevFeatures, currFeatures = featureTracking(prevImage, currImage, prevFeatures)

        prevImage = currImage
        prevFeatures = currFeatures

        x = int(t_f[0]) + 300
        y = int(t_f[2]) + 100
        # print(x,y)

        traj = cv.circle(traj, (x, y), 1, (255, 0, 0), 2)
        traj = cv.rectangle(traj, (10, 30), (550, 50), (255, 255, 255), cv.FILLED)
        text = "Coordinates: x = {:02f}m y = {:02f}m z = {:02f}m".format(t_f[0], t_f[1], t_f[2])
        cv.putText(traj, text, textOrg, fontFace, fontScale, (255, 255, 255), thickness, 8)

        cv.imshow("Road facing camera", currImage_c)
        cv.imshow("Trajectory", traj)

        cv.waitKey(1)

    cv.imwrite("./track.png", traj)


if __name__ == '__main__':
    main()

 

posted @ 2022-03-26 10:42  parallelarc  阅读(650)  评论(0)    收藏  举报