mono-vo的opencv-python实现

一个关于单目里程计的项目，使用C++的opencv搭环境很复杂，总是调不好，于是把原来的c++代码迁移到了python上。

原项目地址：

https://github.com/avisingh599/mono-vo

作者的讲解博客

http://avisingh599.github.io/vision/monocular-vo/

原项目没有Readme，我来写一个：

传统单目里程计（mono-vo）的原理是利用两帧之间的特征点，通过特征匹配反推出相机运动，相机运动有6自由度（DoF）。通过不停的匹配当前帧与第一帧的特征点，推算出当前帧相对于第一帧的位姿，由此得到一个连续的相机运动。

小细节：

1. 特征点采用的是FAST，具体实现是opencv自带的FastFeatureDetector_create()；特征点追踪则使用calcOpticalFlowPyrLK()；特征点选取要慎重，各种参数需要实践中调节。

2. 本项目通过本征矩阵恢复相机运动，方法是opencv封装好的findEssentialMat()

3. 本征矩阵等一系列对极几何方法都存在尺度模糊问题，本项目引入KITTI自带的标签作为人为监督（场外信息），具体见代码中scale变量的获得方法。

4. 每一次都是计算“当前帧与第一帧”的相机位姿，而不是“当前帧与上一帧”的位姿，因为后者更容易产生漂移（误差）。

5. 相机位姿理论上应该得到6DoF，但是本项目用来画图的只是其中的坐标信息，画出的图可以视作一个俯视图。

6. 焦距（focal length）和主点（priciple point）是相机内参，在执行代码前需要确定。KITTI的相机内参可以观察它提供的calib.txt里的内参矩阵得到。

为了方便对照，变量名和逻辑都没有变动。

下面是一个头文件代码，提供了两个函数，对应原项目的vo_feature.py

import cv2 as cv
import numpy as np


def featureTracking(img_1, img_2, points1):
    # this function automatically gets rid of points for which tracking fails
    lk_params = dict(winSize=(21, 21),
                     maxLevel=3,
                     criteria=(cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 30, 0.01),
                     flags=cv.OPTFLOW_LK_GET_MIN_EIGENVALS,
                     minEigThreshold=0.001)
    points2, status, err = cv.calcOpticalFlowPyrLK(img_1, img_2, points1, None, **lk_params)

    # getting rid of points for which the KLT tracking failed or those who have gone outside the frame
    mask = np.all(points2 > 0, axis=1)
    status = np.squeeze(status > 0)
    mask = [mask[i] and status[i] for i in range(len(mask))]

    points1 = points1[mask]
    points2 = points2[mask]

    return points1, points2


def featureDetection(img_1):
    fast_threshold = 100
    nonmaxSuppression = True
    # default: type==cv.FAST_FEATURE_DETECTOR_TYPE_9_1 (value=2)
    # other choices: cv.FAST_FEATURE_DETECTOR_TYPE_5_8 (value=0)/cv.FAST_FEATURE_DETECTOR_TYPE_7_12 (value=1)
    fast = cv.FastFeatureDetector_create(threshold=fast_threshold, nonmaxSuppression=nonmaxSuppression)
    keypoints_1 = fast.detect(img_1, None)
    points1 = cv.KeyPoint_convert(keypoints_1)
    return points1

下面是visodo.py:

from vo_features import featureDetection, featureTracking
import cv2 as cv
import numpy as np
import os


def getMaxFrame(path):
    files_name = os.listdir(path)
    return len(files_name)


def getAbsoluteScale(frame_id):
    try:
        with open("./00.txt", "r") as myfile:#这里00.txt是KITTI中自带的标签信息
            lines = myfile.readlines()
            line = lines[frame_id - 1]
            data = line.split(" ")
            x_prev, y_prev, z_prev = float(data[3]), float(data[7]), float(data[-1])
            line = lines[frame_id]
            data = line.split(" ")
            x, y, z = float(data[3]), float(data[7]), float(data[-1])
        return ((x - x_prev) ** 2 + (y - y_prev) ** 2 + (z - z_prev) ** 2) ** (1 / 2)
    except:
        print("Unable to open file")
        return 0


def main():
    PATH = r'C:/lab/frames/'  #your own dir
    MAX_FRAME = getMaxFrame(PATH)
    MIN_NUM_FEAT = 2000

    files_name = os.listdir(PATH)

    filename1 = PATH + '{}'.format(files_name[0])
    filename2 = PATH + '{}'.format(files_name[1])

    fontFace = cv.FONT_HERSHEY_PLAIN
    fontScale = 1
    thickness = 1
    textOrg = (10, 50)

    img_1_c = cv.imread(filename1)
    img_2_c = cv.imread(filename2)

    img_1 = cv.cvtColor(img_1_c, cv.COLOR_BGR2GRAY)
    img_2 = cv.cvtColor(img_2_c, cv.COLOR_BGR2GRAY)

    points1 = featureDetection(img_1)
    points1, points2 = featureTracking(img_1, img_2, points1)

    focal = 718.8560
    pp = (607.1928, 185.2157)  # principle point

    E, mask = cv.findEssentialMat(points2, points1, focal, pp, cv.RANSAC, 0.999, 1.0)
    _, R, t, _ = cv.recoverPose(E, points2, points1, focal=focal, pp=pp, mask=mask)
    t = np.squeeze(t)
    # print(np.linalg.det(R))

    cv.namedWindow("Road facing camera", cv.WINDOW_AUTOSIZE)  # Create a window for display.
    cv.namedWindow("Trajectory", cv.WINDOW_AUTOSIZE)  # Create a window for display.
    traj = np.ones((800, 800, 3), dtype=np.uint8) * 255

    prevImage = img_2
    prevFeatures = points2
    R_f = R
    t_f = t

    for numFrame in range(2, MAX_FRAME):
        filename = PATH + '{}'.format(files_name[numFrame])
        currImage_c = cv.imread(filename)
        currImage = cv.cvtColor(currImage_c, cv.COLOR_BGR2GRAY)

        prevFeatures, currFeatures = featureTracking(prevImage, currImage, prevFeatures)

        E, mask = cv.findEssentialMat(currFeatures, prevFeatures, focal, pp, cv.RANSAC, 0.999, 1.0)
        _, R, t, _ = cv.recoverPose(E, currFeatures, prevFeatures, focal=focal, pp=pp, mask=mask)
        t = np.squeeze(t)
        scale = getAbsoluteScale(numFrame)
        # print(scale)

        if scale > 0.1 and t[2] > t[0] and t[2] > t[1]:
            t_f = t_f + scale * np.dot(R_f, t)
            R_f = np.dot(R, R_f)

        if len(prevFeatures) < MIN_NUM_FEAT:
            prevFeatures = featureDetection(prevImage)
            prevFeatures, currFeatures = featureTracking(prevImage, currImage, prevFeatures)

        prevImage = currImage
        prevFeatures = currFeatures

        x = int(t_f[0]) + 300
        y = int(t_f[2]) + 100
        # print(x,y)

        traj = cv.circle(traj, (x, y), 1, (255, 0, 0), 2)
        traj = cv.rectangle(traj, (10, 30), (550, 50), (255, 255, 255), cv.FILLED)
        text = "Coordinates: x = {:02f}m y = {:02f}m z = {:02f}m".format(t_f[0], t_f[1], t_f[2])
        cv.putText(traj, text, textOrg, fontFace, fontScale, (255, 255, 255), thickness, 8)

        cv.imshow("Road facing camera", currImage_c)
        cv.imshow("Trajectory", traj)

        cv.waitKey(1)

    cv.imwrite("./track.png", traj)


if __name__ == '__main__':
    main()

posted @ 2022-03-26 10:42 parallelarc 阅读(676) 评论(0) 收藏举报

刷新页面返回顶部

Foxiv

mono-vo的opencv-python实现

公告