OpenCV 5.0.0
Open Source Computer Vision
読み込み中...
検索中...
見つかりません
🤖 AIによる機械翻訳(非公式) — これは OpenCV 5.0.0 公式リファレンス(英語)を AI (Claude) で自動翻訳したものです。訳に誤りを含む場合があります。正確な情報は 公式英語版(原文) を参照してください。
カメラの動き推定

目的

このチュートリアルでは、カメラの運動推定のために再構成APIを使う方法を学ぶ:

  • 追跡された2D点を持つファイルを読み込み、すべてのフレームにわたってコンテナを構築する。
  • libmv再構成パイプラインを実行する。
  • 得られた結果をVizを用いて表示する。

コード

#include <opencv2/core.hpp>
#include <opencv2/sfm.hpp>
#include <opencv2/viz.hpp>
#include <iostream>
#include <fstream>
using namespace std;
using namespace cv;
using namespace cv::sfm;
static void help() {
cout
<< "\n------------------------------------------------------------------\n"
<< " This program shows the camera trajectory reconstruction capabilities\n"
<< " in the OpenCV Structure From Motion (SFM) module.\n"
<< " \n"
<< " Usage:\n"
<< " example_sfm_trajectory_reconstruction <path_to_tracks_file> <f> <cx> <cy>\n"
<< " where: is the tracks file absolute path into your system. \n"
<< " \n"
<< " The file must have the following format: \n"
<< " row1 : x1 y1 x2 y2 ... x36 y36 for track 1\n"
<< " row2 : x1 y1 x2 y2 ... x36 y36 for track 2\n"
<< " etc\n"
<< " \n"
<< " i.e. a row gives the 2D measured position of a point as it is tracked\n"
<< " through frames 1 to 36. If there is no match found in a view then x\n"
<< " and y are -1.\n"
<< " \n"
<< " Each row corresponds to a different point.\n"
<< " \n"
<< " f is the focal length in pixels. \n"
<< " cx is the image principal point x coordinates in pixels. \n"
<< " cy is the image principal point y coordinates in pixels. \n"
<< "------------------------------------------------------------------\n\n"
<< endl;
}
/* Build the following structure data
*
* frame1 frame2 frameN
* track1 | (x11,y11) | -> | (x12,y12) | -> | (x1N,y1N) |
* track2 | (x21,y11) | -> | (x22,y22) | -> | (x2N,y2N) |
* trackN | (xN1,yN1) | -> | (xN2,yN2) | -> | (xNN,yNN) |
*
*
* In case a marker (x,y) does not appear in a frame its
* values will be (-1,-1).
*/
static void parser_2D_tracks(const String &_filename, std::vector<Mat> &points2d )
{
ifstream myfile(_filename.c_str());
if (!myfile.is_open())
{
cout << "Unable to read file: " << _filename << endl;
exit(0);
} else {
double x, y;
string line_str;
int n_frames = 0, n_tracks = 0;
// extract data from text file
vector<vector<Vec2d> > tracks;
for ( ; getline(myfile,line_str); ++n_tracks)
{
istringstream line(line_str);
vector<Vec2d> track;
for ( n_frames = 0; line >> x >> y; ++n_frames)
{
if ( x > 0 && y > 0)
track.push_back(Vec2d(x,y));
else
track.push_back(Vec2d(-1));
}
tracks.push_back(track);
}
// embed data in reconstruction api format
for (int i = 0; i < n_frames; ++i)
{
Mat_<double> frame(2, n_tracks);
for (int j = 0; j < n_tracks; ++j)
{
frame(0,j) = tracks[j][i][0];
frame(1,j) = tracks[j][i][1];
}
points2d.push_back(Mat(frame));
}
myfile.close();
}
}
/* Keyboard callback to control 3D visualization
*/
bool camera_pov = false;
static void keyboard_callback(const viz::KeyboardEvent &event, void* cookie)
{
if ( event.action == 0 &&!event.symbol.compare("s") )
camera_pov = !camera_pov;
}
/* Sample main code
*/
int main(int argc, char** argv)
{
// Read input parameters
if ( argc != 5 )
{
help();
exit(0);
}
// Read 2D points from text file
std::vector<Mat> points2d;
parser_2D_tracks( argv[1], points2d );
// Set the camera calibration matrix
const double f = atof(argv[2]),
cx = atof(argv[3]), cy = atof(argv[4]);
Matx33d K = Matx33d( f, 0, cx,
0, f, cy,
0, 0, 1);
bool is_projective = true;
vector<Mat> Rs_est, ts_est, points3d_estimated;
reconstruct(points2d, Rs_est, ts_est, K, points3d_estimated, is_projective);
// Print output
cout << "\n----------------------------\n" << endl;
cout << "Reconstruction: " << endl;
cout << "============================" << endl;
cout << "Estimated 3D points: " << points3d_estimated.size() << endl;
cout << "Estimated cameras: " << Rs_est.size() << endl;
cout << "Refined intrinsics: " << endl << K << endl << endl;
cout << "3D Visualization: " << endl;
cout << "============================" << endl;
viz::Viz3d window_est("Estimation Coordinate Frame");
window_est.setBackgroundColor(); // black by default
window_est.registerKeyboardCallback(&keyboard_callback);
// Create the pointcloud
cout << "Recovering points ... ";
// recover estimated points3d
vector<Vec3f> point_cloud_est;
for (int i = 0; i < points3d_estimated.size(); ++i)
point_cloud_est.push_back(Vec3f(points3d_estimated[i]));
cout << "[DONE]" << endl;
cout << "Recovering cameras ... ";
vector<Affine3d> path_est;
for (size_t i = 0; i < Rs_est.size(); ++i)
path_est.push_back(Affine3d(Rs_est[i],ts_est[i]));
cout << "[DONE]" << endl;
cout << "Rendering Trajectory ... ";
cout << endl << "Press: " << endl;
cout << " 's' to switch the camera pov" << endl;
cout << " 'q' to close the windows " << endl;
if ( path_est.size() > 0 )
{
// animated trajectory
int idx = 0, forw = -1, n = static_cast<int>(path_est.size());
while(!window_est.wasStopped())
{
for (size_t i = 0; i < point_cloud_est.size(); ++i)
{
Vec3d point = point_cloud_est[i];
Affine3d point_pose(Mat::eye(3,3,CV_64F), point);
char buffer[50];
sprintf (buffer, "%d", static_cast<int>(i));
viz::WCube cube_widget(Point3f(0.1,0.1,0.0), Point3f(0.0,0.0,-0.1), true, viz::Color::blue());
cube_widget.setRenderingProperty(viz::LINE_WIDTH, 2.0);
window_est.showWidget("Cube"+String(buffer), cube_widget, point_pose);
}
Affine3d cam_pose = path_est[idx];
viz::WCameraPosition cpw(0.25); // Coordinate axes
viz::WCameraPosition cpw_frustum(K, 0.3, viz::Color::yellow()); // Camera frustum
if ( camera_pov )
window_est.setViewerPose(cam_pose);
else
{
// render complete trajectory
window_est.showWidget("cameras_frames_and_lines_est", viz::WTrajectory(path_est, viz::WTrajectory::PATH, 1.0, viz::Color::green()));
window_est.showWidget("CPW", cpw, cam_pose);
window_est.showWidget("CPW_FRUSTUM", cpw_frustum, cam_pose);
}
// update trajectory index (spring effect)
forw *= (idx==n || idx==0) ? -1: 1; idx += forw;
// frame rate 1s
window_est.spinOnce(1, true);
window_est.removeAllWidgets();
}
}
return 0;
}
Affine transform.
Definition affine.hpp:127
Template matrix class derived from Mat.
Definition mat.hpp:2581
Comma-separated Matrix Initializer.
Definition mat.hpp:964
Template class for 3D points specified by its coordinates x, y and z.
Definition types.hpp:255
This class represents a keyboard event.
Definition types.hpp:288
String symbol
Definition types.hpp:303
Action action
Definition types.hpp:302
The Viz3d class represents a 3D visualizer window. This class is implicitly shared.
Definition viz3d.hpp:68
This 3D Widget represents camera position in a scene by its axes or viewing frustum....
Definition widgets.hpp:544
This 3D Widget defines a cube.
Definition widgets.hpp:373
Trajectories.
Definition widgets.hpp:605
std::string String
Definition cvstd.hpp:151
#define CV_64F
Definition interface.h:60
void reconstruct(InputArrayOfArrays points2d, OutputArray Ps, OutputArray points3d, InputOutputArray K, bool is_projective=false)
Reconstruct 3d points from 2d correspondences while performing autocalibration.
int main(int argc, char *argv[])
Definition highgui_qt.cpp:3
Definition conditioning.hpp:44
Definition core.hpp:107
STL namespace.

解説

まず、すべてのフレームにわたって追跡された2D点を含むファイルを読み込み、再構成APIに渡すコンテナを構築する必要がある。この場合、追跡された2D点は次のような構造を持つ。2D点配列のベクトルであり、各内側の配列は異なるフレームを表す。各フレームは2D点のリストで構成されており、例えばフレーム1の最初の点はフレーム2の同じ点である。あるフレームに点が存在しない場合、割り当てられる値は (-1,-1) となる:

/* Build the following structure data
*
* frame1 frame2 frameN
* track1 | (x11,y11) | -> | (x12,y12) | -> | (x1N,y1N) |
* track2 | (x21,y11) | -> | (x22,y22) | -> | (x2N,y2N) |
* trackN | (xN1,yN1) | -> | (xN2,yN2) | -> | (xNN,yNN) |
*
*
* In case a marker (x,y) does not appear in a frame its
* values will be (-1,-1).
*/
...
for (int i = 0; i < n_frames; ++i)
{
Mat_<double> frame(2, n_tracks);
for (int j = 0; j < n_tracks; ++j)
{
frame(0,j) = tracks[j][i][0];
frame(1,j) = tracks[j][i][1];
}
points2d.push_back(Mat(frame));
}

次に、構築したコンテナを使って再構成APIに渡す。推定された結果は vector<Mat> に格納しなければならない点に注意することが重要である:

bool is_projective = true;
vector<Mat> Rs_est, ts_est, points3d_estimated;
reconstruct(points2d, Rs_est, ts_est, K, points3d_estimated, is_projective);
// Print output
cout << "\n----------------------------\n" << endl;
cout << "Reconstruction: " << endl;
cout << "============================" << endl;
cout << "Estimated 3D points: " << points3d_estimated.size() << endl;
cout << "Estimated cameras: " << Rs_est.size() << endl;
cout << "Refined intrinsics: " << endl << K << endl << endl;

最後に、得られた結果がVizで表示される。この場合、振動効果を伴ってカメラが再現される。

使い方と結果

このサンプルを実行するには、追跡された点のファイルへのパス、カメラの焦点距離、そして中心投影座標 (ピクセル単位) を指定する必要がある。サンプルファイルは samples/data/desktop_trakcks.txt にある

./example_sfm_trajectory_reconstruction desktop_tracks.txt 1914 640 360

次の図は、追跡された2D点から得られたカメラ運動を示す: