上一篇简单介绍了ArUco的安装与使用,这一篇用ArUco来实现相机的位姿估计。

一、用单个marker实现相机的位姿估计

我们从新建一个工程做起:

  • 第1步,创建工程目录

找个你比较常用的文件夹,在此文件夹下打开终端,运行以下命令:

$ mkdir m_aruco
$ cd m_aruco/
$ mkdir bin include lib src test yml
$ touch CMakeLists.txt README

完了以后你的工程目录里应该是这样的:

  • 第2步,开始配置这个工程

在终端运行$ gedit CMakelists.txt,粘贴以下代码:

# ----------------------------------------------------------------------------
#   Basic Configuration
# ----------------------------------------------------------------------------
cmake_minimum_required(VERSION 3.0)
project(m_aruco LANGUAGES CXX)

set(CMAKE_INCLUDE_CURRENT_DIR ON)
set(CMAKE_CXX_STANDARD 11) # C++11...
set(CMAKE_CXX_STANDARD_REQUIRED ON) #...is required...
set(CMAKE_CXX_EXTENSIONS ON) #...with compiler extensions like gnu++11

set( EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin )
set( LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib )

find_package( OpenCV REQUIRED )
# set(aruco_DIR "/home/exbot/OpenCV/aruco306-installed/share/aruco/cmake")
# find_package( aruco REQUIRED )

# If could not find could not find a package configuration file provided by "aruco", 
# set the "aruco_INCLUDE_DIRS" and the "aruco_LIBS" by mabual setting directly.
set(aruco_INCLUDE_DIRS  /home/exbot/OpenCV/aruco306-installed/include/aruco)
set(aruco_LIBS  /home/exbot/OpenCV/aruco306-installed/lib/libaruco.so)

include_directories( include ${OpenCV_INCLUDE_DIRS} ${aruco_INCLUDE_DIRS})

add_executable( cam_pose_estimator test/cam_pose_estimator.cpp )
target_link_libraries( cam_pose_estimator ${OpenCV_LIBS} ${aruco_LIBS} )
  • 第3步,添加源文件

cd 到test文件夹下,运行命令:

$ touch cam_pose_estimator.cpp
$ gedit cam_pose_estimator.cpp

然后粘贴以下代码:

/*****************************************************/
/*                   By Li Xujie                     */
/*****************************************************/

#include "aruco.h"
#include "cvdrawingutils.h"
#include <opencv2/opencv.hpp>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <stdexcept>

using namespace std;
using namespace cv;
using namespace aruco;

MarkerDetector MDetector;
VideoCapture TheVideoCapturer;
vector<Marker> TheMarkers;
Mat TheInputImage,TheInputImageGrey, TheInputImageCopy;
CameraParameters TheCameraParameters;

int waitTime = 0;
int ref_id = 0;
bool isVideo=false;
class CmdLineParser{int argc;char** argv;public:CmdLineParser(int _argc, char** _argv): argc(_argc), argv(_argv){}   bool operator[](string param)    {int idx = -1;  for (int i = 0; i < argc && idx == -1; i++)if (string(argv[i]) == param)idx = i;return (idx != -1);}    string operator()(string param, string defvalue = "-1")    {int idx = -1;for (int i = 0; i < argc && idx == -1; i++)if (string(argv[i]) == param)idx = i;if (idx == -1)return defvalue;else return (argv[idx + 1]);}};
struct TimerAvrg{std::vector<double> times;size_t curr=0,n; std::chrono::high_resolution_clock::time_point begin,end;   TimerAvrg(int _n=30){n=_n;times.reserve(n);   }inline void start(){begin= std::chrono::high_resolution_clock::now();    }inline void stop(){end= std::chrono::high_resolution_clock::now();double duration=double(std::chrono::duration_cast<std::chrono::microseconds>(end-begin).count())*1e-6;if ( times.size()<n) times.push_back(duration);else{ times[curr]=duration; curr++;if (curr>=times.size()) curr=0;}}double getAvrg(){double sum=0;for(auto t:times) sum+=t;return sum/double(times.size());}};

TimerAvrg Fps;
char cam_pose[100];
char cam_vect[100];

void putText(cv::Mat &im,string text,cv::Point p,float size){
    float fact=float(im.cols)/float(640);
    if (fact<1) fact=1;
    cv::putText(im,text,p,FONT_HERSHEY_SIMPLEX, size,cv::Scalar(0,0,0),3*fact);
    cv::putText(im,text,p,FONT_HERSHEY_SIMPLEX, size,cv::Scalar(125,255,255),1*fact);
}

void printInfo(cv::Mat &im){
    float fs=float(im.cols)/float(1200);
    putText(im, "fps="+to_string(1./Fps.getAvrg()),cv::Point(10,fs*30),fs*1.0f);
    putText(im, cam_pose, cv::Point(10,fs*60),fs*1.0f);
    putText(im, cam_vect, cv::Point(10,fs*90),fs*1.0f);
}

int main(int argc, char** argv)
{
    try
    {
        CmdLineParser cml(argc, argv);
        if (argc < 2 || cml["-h"])
        {
            cerr << "Invalid number of arguments" << endl;
            cerr << "Usage: (in.avi|live[:camera_index(e.g 0 or 1)]) [-c camera_params.yml] [-s  marker_size_in_meters] [-d dictionaty:"
                    "ALL_DICTS by default] [-ref_id reference_marker's_id for estimating pose of camera] [-e use EnclosedMarker or not] [-h]" << endl;
            return false;
        }

        ///  PARSE ARGUMENTS
        string TheInputVideo = argv[1];
        ref_id = std::stoi(cml("-ref_id"));
        if(-1 == ref_id){cout<<"You need indicate a reference_marker by use the parameter [-ref_id].\n"<<endl;return false;}
	
        // read camera parameters if passed
        float TheMarkerSize = std::stof(cml("-s", "-1"));
		if (cml["-c"]) TheCameraParameters.readFromXMLFile(cml("-c"));
		MDetector.setDictionary(cml("-d", "ALL_DICTS") );  // sets the dictionary to be employed (ARUCO,APRILTAGS,ARTOOLKIT,etc)
		MDetector.getParameters().detectEnclosedMarkers(std::stoi(cml("-e", "0"))); //if use enclosed markers, set -e 1(true), or set -e 0(false). Default value is 0.
		std::map<uint32_t, MarkerPoseTracker> MTracker;  // use a map so that for each id, we use a different pose tracker
		
        ///  OPEN VIDEO
        // read from camera or from  file
        if (TheInputVideo.find("live") != string::npos)
        {
            int vIdx = 0;
            // check if the :idx is here
            char cad[100];
            if (TheInputVideo.find(":") != string::npos)
            {
                std::replace(TheInputVideo.begin(), TheInputVideo.end(), ':', ' ');
                sscanf(TheInputVideo.c_str(), "%s %d", cad, &vIdx);
            }
            cout << "Opening camera index " << vIdx << endl;
            TheVideoCapturer.open(vIdx);
            waitTime = 10;
            isVideo=true;
        }
        else{
            TheVideoCapturer.open(TheInputVideo);
            if ( TheVideoCapturer.get(CV_CAP_PROP_FRAME_COUNT)>=2) isVideo=true;
        }
        // check video is open
        if (!TheVideoCapturer.isOpened()) throw std::runtime_error("Could not open video");
		cv::namedWindow("in",cv::WINDOW_AUTOSIZE);
        / CONFIGURE DATA
        // read first image to get the dimensions
        TheVideoCapturer >> TheInputImage;
        if (TheCameraParameters.isValid()) TheCameraParameters.resize(TheInputImage.size());

        char key = 0;
        int index = 0,indexSave=0;
        // capture until press ESC or until the end of the video
         do
        {
            TheVideoCapturer.retrieve(TheInputImage);
		    TheInputImage.copyTo(TheInputImageCopy);

            Fps.start();
            // TheMarkers = MDetector.detect(TheInputImage, TheCameraParameters, TheMarkerSize);
            TheMarkers = MDetector.detect(TheInputImage);
		    for (auto& marker : TheMarkers)  // for each marker
                MTracker[marker.id].estimatePose(marker, TheCameraParameters, TheMarkerSize);  // call its tracker and estimate the pose
            Fps.stop();

            for (unsigned int i = 0; i < TheMarkers.size(); i++)
			{
				if(ref_id == TheMarkers[i].id)
				{
				    Mat camPosMatrix, camVecMatrix;
				    Mat vect = (Mat_<float>(3,1)<<0,0,1);
				    // R、t矩阵法
				    Mat rMatrix, tMatrix;
                    Rodrigues(TheMarkers[i].Rvec, rMatrix);
                    transpose(TheMarkers[i].Tvec, tMatrix);
                    camPosMatrix = rMatrix.inv()*(-tMatrix);
                    camVecMatrix = rMatrix.inv()*vect;
				    cout << "Camara Position: " << camPosMatrix.t() << "\nCamera Direction: " << camVecMatrix.t() << endl;
				    // 齐次矩阵法
				    Mat RTinv = MTracker[ref_id].getRTMatrix().inv();
				    camPosMatrix = RTinv(Rect(3,0,1,3)).clone();
				    camVecMatrix = RTinv(Range(0,3),Range(0,3))*vect;
				    
				    sprintf(cam_pose,"Camera Position: px=%f, py=%f, pz=%f", camPosMatrix.at<float>(0,0), 
					    camPosMatrix.at<float>(1,0), camPosMatrix.at<float>(2,0));
				    sprintf(cam_vect,"Camera Direction: dx=%f, dy=%f, dz=%f", camVecMatrix.at<float>(0,0), 
					    camVecMatrix.at<float>(1,0), camVecMatrix.at<float>(2,0));
					cout << TheMarkers[i].dict_info << " " <<  TheMarkers[i].id << endl;
				    cout << "Rvec = " << TheMarkers[i].Rvec << endl << "Tvec = " << TheMarkers[i].Tvec << endl;
				    cout << TheMarkers[i][0] << TheMarkers[i][1] << TheMarkers[i][2] << TheMarkers[i][3] << endl << endl;
				    CvDrawingUtils::draw3dAxis(TheInputImageCopy, TheMarkers[i], TheCameraParameters);
				}
				TheMarkers[i].draw(TheInputImageCopy, Scalar(0, 0, 255), 2, true);
				// CvDrawingUtils::draw3dCube(TheInputImageCopy, TheMarkers[i], TheCameraParameters);
			}

            // show input with augmented information and  the thresholded image
            printInfo(TheInputImageCopy);
            cv::imshow("in", TheInputImageCopy);

            key = cv::waitKey(waitTime);  // wait for key to be pressed
            if (key == 's') waitTime = waitTime == 0 ? 10 : 0;
            index++;  // number of images captured

            if (isVideo) if ( TheVideoCapturer.grab()==false) key=27;
        } while (key != 27 );
    }
    catch (std::exception& ex) {cout << "Exception :" << ex.what() << endl; }
}

这个程序是我在aruco_test.cpp源码的基础上做的删改,有兴趣的同学可以对比一下,我删掉了很多不必要的东西,尽管如此,这一段代码还是有点长。另外,我对库函数Marker::draw()的源码做了一点修改,只是做了一些marker角点的可视化操作,对本程序的运行不构成影响,故这里说明一下不再贴出代码。

  • 第4步,编译运行

cd 到m_aruco目录下,运行命令:

$ mkdir build
$ cd build
$ cmake ..
$ make

如果你的OpenCV和ArUco都安装无误,编译过程应该是不会报错的。编译成功后会在bin目录下生成一个名为“cam_pose_estimator”的可执行文件。在运行这个程序之前,请将你之前生成的相机标定文件copy到yml文件夹下,并准备好你的二维码,测出它的边长。准备就绪以后,cd 到m_aruco目录下,运行命令:

$ ./bin/cam_pose_estimator live:1 -c yml/out_camera_calibration.yml -s 0.135 -d ARUCO -ref_id 4 -e 1

稍微解释一下:-c代表相机标定文件,-s代表marker的边长,-d代表marker的字典,-ref_id代表参考marker的id(估计的相机位姿以该ID的marker为参考坐标系),-e代表使用的是enclosed_marker。

运行结果如上图所示,“Camera Position”表示在marker4坐标系下的三维坐标,“Camera Direction”表示在marker4坐标系下相机的朝向。理想情况下,如果相机正对marker4,则CameraDirection=(0, 0, -1)。终端打印的是参考marker的字典信息以及它的ID,还有marker坐标系到相机坐标系的旋转向量和平移向量,以及marker每个角点的像素坐标等。终端与图像中显示的相机位姿是用不同方法算出来的,大家可参照代码进行对比,二者结果完全一样。

如果你发现程序估计的相机位姿与相机真实的位姿差距较大,那么多半是你的相机标定得不准。这里不得不说,用上一篇提到的标定方法标定出来的结果非常不准,所以还是建议大家用OpenCV的官方例程重新标定一遍。(OpenCV的官方标定例程在OpenCV源码包的samples/cpp文件夹下,名为calibration.cpp,具体怎么用就不说了)

二、原理解释

1、代码流程

个人觉得,ArUco的源码写得挺乱的,格式很不规整。可能因为鄙人的C++水平太差,反正我费了好大劲才搞明白程序的整个流程。下面将整个过程描述一下:

(1) 进入main()函数,根据你给的入口参数进行一系列配置,初始化一个非常重要的对象MarkerDetector MDetector;

(2) 进入do{ }while(key != 27 );循环,相机设备传入图像数据,开始执行一个非常重要的函数TheMarkers = MDetector.detect(TheInputImage, TheCameraParameters, TheMarkerSize);。下面跟随这个函数,来看看marker的检测、识别和位姿估计过程。这时你可能需要一个好用的IDE打开你的程序,因为接下来我们要在ArUco的源码中跳来跳去。

  • <1> 跳转到函数的定义std::vector<aruco::Marker> MarkerDetector::detect(const cv::Mat& input, const CameraParameters& camParams, float markerSizeMeters, bool setYPerperdicular),注意这个函数的类型、返回值以及参数的传递。从这个函数跳转到void MarkerDetector::detect(const cv::Mat& input, std::vector<Marker>& detectedMarkers, CameraParameters camParams, float markerSizeMeters, bool setYPerpendicular),先判断传入图像的尺寸与相机参数中设置的尺寸是否一致,不一致的话就要调整相机参数以适应图像尺寸。通常都是一致的,所以会执行detect(input, detectedMarkers, camParams.CameraMatrix, camParams.Distorsion, markerSizeMeters,setYPerpendicular);,注意这里的参数传递。跳转到函数void MarkerDetector::detect(const cv::Mat& input, vector<Marker>& detectedMarkers, Mat camMatrix, Mat distCoeff, float markerSizeMeters, bool setYPerpendicular),好戏才刚刚开始!
  • <2> ConvertGrey:彩色图转灰度图 cv::cvtColor(input,grey,CV_BGR2GRAY);
  • <3> CreateImageToThreshold:创建一个用于进行图像阈值分割的Mat对象imgToBeThresHolded。这里开始出现一个叫“_params”的结构体对象,由于我们没有传入对Detector的参数配置文件,所以_params中所有的变量都采用默认值。这样的话,因为if ( _params.lowResMarkerSize<minpixsize )条件不成立,所以imgToBeThresHolded=grey;
  • <4> BuildPyramid:构建高斯金字塔。因为_params.maxThreads的默认值是1,所以直接执行buildPyramid(imagePyramid,grey,2*getMarkerWarpSize());
  • <5> Threshold and Detect rectangles:阈值化图像并进行初始矩形检测。进入函数MarkerCanditates=thresholdAndDetectRectangles(imgToBeThresHolded );,一顿操作后,跳转到thresholdAndDetectRectangles_thread ( );,又一顿操作后跳转到重载函数vector< MarkerDetector::MarkerCandidate> MarkerDetector::thresholdAndDetectRectangles(const cv::Mat & input, int thres_param1,int thres_param2,bool enclosed,cv::Mat &auxThresImage)。先用自适应阈值分割对图像进行二值化cv::adaptiveThreshold(input, auxImage, 255., ADAPTIVE_THRESH_MEAN_C, THRESH_BINARY_INV, static_cast<int>(thres_param1), static_cast<int>(thres_param2));,然后提取轮廓cv::findContours(auxThresImage, contours, cv::noArray(), CV_RETR_LIST, CV_CHAIN_APPROX_NONE);,在此基础上提取角点cv::approxPolyDP(contours[i], approxCurve, double(contours[i].size()) * 0.05, true);,最后将检测到的四边形push_back到MarkerCanditates中,包括它的角点for (int j = 0; j < 4; j++) MarkerCanditates.back().push_back( Point2f(static_cast<float>(approxCurve[j].x),static_cast<float>(approxCurve[j].y)));和轮廓MarkerCanditates.back().contour=contours[i];。完了以后参数回传,通过另一个重载函数中的joinVectors( _vcandidates,joined);,将MarkerCanditates里所有的内容传回detect()函数。
  • <6> prefilterCandidates:筛选MarkerCanditates,进入函数MarkerCanditates=prefilterCandidates(MarkerCanditates,imgToBeThresHolded.size());。先将MarkerCanditates的四个角点按照顺时针方向重新排序(源码中的注释是“anti-clockwise order”,我觉得此处有误),然后设置一些门槛(比如轮廓过大或过小),将不合适的四边形删掉,最后只返回有效的矩形for(size_t i=0;i<MarkerCanditates.size();i++) if (!toRemove[i])finalCandidates.push_back(MarkerCanditates[i]);
  • <7> Marker candidate classification:检测识别MarkerCanditates,通过解析其内部编码确定哪些是真正的marker。为了达到这个目的,先进行一步透视变换,来得到每个矩形规范的形态(正视图),warp( inToWarp, canonicalMarker, Size(markerWarpSize,markerWarpSize),points2d_pyr);。接下来跳转到bool DictionaryBased::detect(const cv::Mat& in, int& marker_id, int& nRotations, std::string &additionalInfo),利用OTSU算法对图像进行二值化cv::threshold(grey, grey, 125, 255, cv::THRESH_BINARY | cv::THRESH_OTSU);,将marker分解成黑白相间的格子,来获取它的内部编码getInnerCode(grey,nbits,ids);。随后,根据获取的内部编码信息在指定字典中进行查找,如果匹配成功if ( dic->is( ids[rot])),即可获得当前marker的idmarker_id = dic->at (ids[rot]);、nRotations(通过它可以调整marker角点的顺序)nRotations = rot;以及字典名additionalInfo=dic->getName();。如果这个MarkerCanditates不是真正的marker或者当前marker不属于你指定的字典,函数会返回false;如果识别出这个MarkerCanditates则返回true,并将其push_back到detectedMarkers中detectedMarkers.push_back(MarkerCanditates[i]);,然后调整marker角点的顺序std::rotate(detectedMarkers.back().begin(), detectedMarkers.back().begin() + 4 - nRotations, detectedMarkers.back().end());。这一步很重要,因为每一个marker都是有方向的,所以它的四个角点是有固定顺序的,即第一个角点总是位于marker坐标系xOy平面的第二象限(左上角),沿顺时针方向依次排列。
  • <8> Remvoe Duplicated:剔除一些可能被重复检测的marker。removeElements(detectedMarkers,toRemove);,正是由于这一步,如果一幅图像中同时出现两个属于同一字典中同一id的marker,则在图片中成像较小的那个会被删掉。
  • <9> Corner Refinement:细化marker角点的像素坐标。由于_params.cornerRefinementM的默认值为CORNER_SUBPIX,所以执行函数cornerSubPix(grey, Corners, cvSize(halfwsize,halfwsize), cvSize(-1, -1),cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 12, 0.005));
  • <10> Pose Estimation:估计marker的位姿,求取Rvec(旋转向量)和Tvec(平移向量)。跳转到函数void Marker::calculateExtrinsics(float markerSizeMeters, cv::Mat camMatrix, cv::Mat distCoeff, bool setYPerpendicular),以当前marker的中心作为参考坐标系的原点,计算其每个角点的空间坐标vector<cv::Point3f> objpoints = get3DPoints(markerSizeMeters);,然后执行solvePnP(objpoints, *this,camMatrix, distCoeff,raux,taux);。跳转到void solvePnP(const vector<cv::Point3f>& objPoints, const std::vector<cv::Point2f>& imgPoints, cv::InputArray cameraMatrix, cv::InputArray distCoeffs,cv::Mat &rvec,cv::Mat &tvec),执行solvePoseOfCentredSquare(markerLength, imgPoints, cameraMatrix, distCoeffs, rvec, tvec, reprojErr1, Rvec2, Tvec2, reprojErr2);,用PnP(Perspective-n-Point)方法求解。

(3) 我的天呐!这个函数终于跑完了!!回到主函数!!!现在我们已经检测到了图像中所有的marker,并且得到了每个marker的id、角点像素坐标以及非常重要的Rvec和Tvec等信息,下面就来计算相机在参考marker下的位姿。因为Rvec和Tvec是从marker坐标系到相机坐标系的变换,而我们要求的是相机在marker坐标系下的位姿(由相机坐标系变换到marker坐标系),所以camPosMatrix = rMatrix.inv()*(-TheMarkers[i].Tvec);camVecMatrix=rMatrix.inv()*vect;

(4) 打印显示输出一些信息,这里没什么要说的,代码一看就明白。

至此,整个程序的工作流程大体介绍完了。接下来说一说这个PnP,看看它是怎么用二维码的四个角点求出Rvec和Tvec的。

2、PnP原理

这个地方我可能讲得不好,先放上Wikipedia里关于PnP的解释:Perspective-n-Point 。在下才疏学浅,有许多地方自己还不是很理解,如不嫌弃,各位将就着往下看吧!

PnP(Perspective-n-Point)是求解3D到2D点对运动的方法。它描述了当知道n个三维空间点坐标及其二维投影位置时,如何估计相机的位姿。试想一下,在一幅图像中,我们只要知道其中至少三个点的3D空间坐标就可以用于估计相机的运动以及相机的姿态(需要至少一个额外点验证结果)。PnP问题,就是指通过世界中的n个特征点与图像中对应的n个像点,计算出其投影关系,从而获得相机或物体位姿的问题。 好吧,我们看图说话:

3、坐标变换

下面简单描述一下相机与marker之间的坐标变换:


#####参考文献
[1] https://www.zybuluo.com/codeep/note/163962
[2] https://en.wikipedia.org/wiki/Perspective-n-Point
[3] https://blog.csdn.net/ZJU_fish1996/article/details/72312514?locationNum=7&fps=1
[4] https://docs.google.com/document/d/1QU9KoBtjSM2kF6ITOjQ76xqL7H0TEtXriJX5kwi9Kgc/edit (此链接需要翻墙