30
#ifndef OPENCV_FLANN_AUTOTUNED_INDEX_H_
31
#define OPENCV_FLANN_AUTOTUNED_INDEX_H_
38
#include "ground_truth.h"
39
#include "index_testing.h"
41
#include "kdtree_index.h"
42
#include "kdtree_single_index.h"
43
#include "kmeans_index.h"
44
#include "composite_index.h"
45
#include "linear_index.h"
51
template<
typename
Distance>
52NNIndex<Distance>* create_index_by_type(
const
Matrix<typename Distance::ElementType>& dataset,
const
IndexParams& params,
const
Distance& distance);
55
struct
AutotunedIndexParams :
public
IndexParams
57
AutotunedIndexParams(
float
target_precision = 0.8,
float
build_weight = 0.01,
float
memory_weight = 0,
float
sample_fraction = 0.1)
59
(*this)[
"algorithm"] = FLANN_INDEX_AUTOTUNED;
61
(*this)[
"target_precision"] = target_precision;
63
(*this)[
"build_weight"] = build_weight;
65
(*this)[
"memory_weight"] = memory_weight;
67
(*this)[
"sample_fraction"] = sample_fraction;
72
template
<
typename
Distance>
73
class
AutotunedIndex :
public
NNIndex<Distance>
76
typedef
typename
Distance::ElementType ElementType;
77
typedef
typename
Distance::ResultType DistanceType;
79
AutotunedIndex(
const
Matrix<ElementType>& inputData,
const
IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) :
80
dataset_(inputData), distance_(d)
82
target_precision_ = get_param(params,
"target_precision",0.8f);
83
build_weight_ = get_param(params,
"build_weight", 0.01f);
84
memory_weight_ = get_param(params,
"memory_weight", 0.0f);
85
sample_fraction_ = get_param(params,
"sample_fraction", 0.1f);
90
AutotunedIndex(
const
AutotunedIndex&);
91
AutotunedIndex& operator=(
const
AutotunedIndex&);
93
virtual
~AutotunedIndex()
95
if
(bestIndex_ != NULL) {
104
virtual
void
buildIndex() CV_OVERRIDE
106
std::ostringstream stream;
107
bestParams_ = estimateBuildParams();
108
print_params(bestParams_, stream);
109
Logger::info(
"----------------------------------------------------\n");
110
Logger::info(
"Autotuned parameters:\n");
111
Logger::info(
"%s", stream.str().c_str());
112
Logger::info(
"----------------------------------------------------\n");
114
bestIndex_ = create_index_by_type(dataset_, bestParams_, distance_);
115
bestIndex_->buildIndex();
116
speedup_ = estimateSearchParams(bestSearchParams_);
117
stream.str(std::string());
118
print_params(bestSearchParams_, stream);
119
Logger::info(
"----------------------------------------------------\n");
120
Logger::info(
"Search parameters:\n");
121
Logger::info(
"%s", stream.str().c_str());
122
Logger::info(
"----------------------------------------------------\n");
128
virtual
void
saveIndex(FILE* stream) CV_OVERRIDE
130
save_value(stream, (
int)bestIndex_->getType());
131
bestIndex_->saveIndex(stream);
132
save_value(stream, get_param<int>(bestSearchParams_,
"checks"));
138
virtual
void
loadIndex(FILE* stream) CV_OVERRIDE
142
load_value(stream, index_type);
144
params[
"algorithm"] = (flann_algorithm_t)index_type;
145
bestIndex_ = create_index_by_type<Distance>(dataset_, params, distance_);
146
bestIndex_->loadIndex(stream);
148
load_value(stream, checks);
149
bestSearchParams_[
"checks"] = checks;
155
virtual
void
findNeighbors(ResultSet<DistanceType>& result,
const
ElementType* vec,
const
SearchParams& searchParams) CV_OVERRIDE
157
int
checks = get_param<int>(searchParams,
"checks",FLANN_CHECKS_AUTOTUNED);
158
if
(checks == FLANN_CHECKS_AUTOTUNED) {
159
bestIndex_->findNeighbors(result, vec, bestSearchParams_);
162
bestIndex_->findNeighbors(result, vec, searchParams);
167
IndexParams getParameters() const CV_OVERRIDE
169
return
bestIndex_->getParameters();
172
SearchParams getSearchParameters()
const
174
return
bestSearchParams_;
177
float
getSpeedup()
const
186
virtual
size_t
size() const CV_OVERRIDE
188
return
bestIndex_->size();
194
virtual
size_t
veclen() const CV_OVERRIDE
196
return
bestIndex_->veclen();
202
virtual
int
usedMemory() const CV_OVERRIDE
204
return
bestIndex_->usedMemory();
210
virtual
flann_algorithm_t getType() const CV_OVERRIDE
212
return
FLANN_INDEX_AUTOTUNED;
219
float
searchTimeCost;
226
void
evaluate_kmeans(CostData& cost)
232
Logger::info(
"KMeansTree using params: max_iterations=%d, branching=%d\n",
233
get_param<int>(cost.params,
"iterations"),
234
get_param<int>(cost.params,
"branching"));
235
KMeansIndex<Distance>
kmeans(sampledDataset_, cost.params, distance_);
240
float
buildTime = (float)t.value;
243
float
searchTime = test_index_precision(
kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
245
float
datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols *
sizeof(
float));
246
cost.memoryCost = (
kmeans.usedMemory() + datasetMemory) / datasetMemory;
247
cost.searchTimeCost = searchTime;
248
cost.buildTimeCost = buildTime;
249
Logger::info(
"KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_);
253
void
evaluate_kdtree(CostData& cost)
259
Logger::info(
"KDTree using params: trees=%d\n", get_param<int>(cost.params,
"trees"));
260
KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_);
265
float
buildTime = (float)t.value;
268
float
searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn);
270
float
datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols *
sizeof(
float));
271
cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory;
272
cost.searchTimeCost = searchTime;
273
cost.buildTimeCost = buildTime;
274
Logger::info(
"KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime);
326
void
optimizeKMeans(std::vector<CostData>& costs)
328
Logger::info(
"KMEANS, Step 1: Exploring parameter space\n");
331
int
maxIterations[] = { 1, 5, 10, 15 };
332
int
branchingFactors[] = { 16, 32, 64, 128, 256 };
334
int
kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors);
335
costs.reserve(costs.size() + kmeansParamSpaceSize);
338
for
(
size_t
i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) {
339
for
(
size_t
j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) {
341
cost.params[
"algorithm"] = FLANN_INDEX_KMEANS;
342
cost.params[
"centers_init"] = FLANN_CENTERS_RANDOM;
343
cost.params[
"iterations"] = maxIterations[i];
344
cost.params[
"branching"] = branchingFactors[j];
346
evaluate_kmeans(cost);
347
costs.push_back(cost);
374
void
optimizeKDTree(std::vector<CostData>& costs)
376
Logger::info(
"KD-TREE, Step 1: Exploring parameter space\n");
379
int
testTrees[] = { 1, 4, 8, 16, 32 };
382
for
(
size_t
i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) {
384
cost.params[
"algorithm"] = FLANN_INDEX_KDTREE;
385
cost.params[
"trees"] = testTrees[i];
387
evaluate_kdtree(cost);
388
costs.push_back(cost);
416
IndexParams estimateBuildParams()
418
std::vector<CostData> costs;
420
int
sampleSize = int(sample_fraction_ * dataset_.rows);
421
int
testSampleSize =
std::min(sampleSize / 10, 1000);
423
Logger::info(
"Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_);
427
if
(testSampleSize < 10) {
428
Logger::info(
"Choosing linear, dataset too small\n");
429
return
LinearIndexParams();
433
sampledDataset_ = random_sample(dataset_, sampleSize);
435
testDataset_ = random_sample(sampledDataset_, testSampleSize,
true);
438
Logger::info(
"Computing ground truth... \n");
439
gt_matches_ = Matrix<int>(
new
int[testDataset_.rows], testDataset_.rows, 1);
442
compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_);
445
CostData linear_cost;
446
linear_cost.searchTimeCost = (float)t.value;
447
linear_cost.buildTimeCost = 0;
448
linear_cost.memoryCost = 0;
449
linear_cost.params[
"algorithm"] = FLANN_INDEX_LINEAR;
451
costs.push_back(linear_cost);
454
Logger::info(
"Autotuning parameters...\n");
456
optimizeKMeans(costs);
457
optimizeKDTree(costs);
459
float
bestTimeCost = costs[0].searchTimeCost;
460
for
(
size_t
i = 0; i < costs.size(); ++i) {
461
float
timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost;
462
if
(timeCost < bestTimeCost) {
463
bestTimeCost = timeCost;
467
float
bestCost = costs[0].searchTimeCost / bestTimeCost;
468
IndexParams bestParams = costs[0].params;
469
if
(bestTimeCost > 0) {
470
for
(
size_t
i = 0; i < costs.size(); ++i) {
471
float
crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost +
472
memory_weight_ * costs[i].memoryCost;
473
if
(crtCost < bestCost) {
475
bestParams = costs[i].params;
480
delete[] gt_matches_.data;
481
delete[] testDataset_.data;
482
delete[] sampledDataset_.data;
494
float
estimateSearchParams(SearchParams& searchParams)
497
const
size_t
SAMPLE_COUNT = 1000;
499
CV_Assert(bestIndex_ != NULL &&
"Requires a valid index");
503
int
samples = (int)
std::min(dataset_.rows / 10, SAMPLE_COUNT);
505
Matrix<ElementType> testDataset = random_sample(dataset_, samples);
507
Logger::info(
"Computing ground truth\n");
510
Matrix<int> gt_matches(
new
int[testDataset.rows], testDataset.rows, 1);
513
compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_);
515
float
linear = (float)t.value;
518
Logger::info(
"Estimating number of checks\n");
522
if
(bestIndex_->getType() == FLANN_INDEX_KMEANS) {
523
Logger::info(
"KMeans algorithm, estimating cluster border factor\n");
524
KMeansIndex<Distance>*
kmeans
= (KMeansIndex<Distance>*)bestIndex_;
525
float
bestSearchTime = -1;
526
float
best_cb_index = -1;
527
int
best_checks = -1;
528
for
(cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) {
529
kmeans->set_cb_index(cb_index);
530
searchTime = test_index_precision(*
kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
531
if
((searchTime < bestSearchTime) || (bestSearchTime == -1)) {
532
bestSearchTime = searchTime;
533
best_cb_index = cb_index;
534
best_checks = checks;
537
searchTime = bestSearchTime;
538
cb_index = best_cb_index;
539
checks = best_checks;
541
kmeans->set_cb_index(best_cb_index);
542
Logger::info(
"Optimum cb_index: %g\n", cb_index);
543
bestParams_[
"cb_index"] = cb_index;
546
searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1);
549
Logger::info(
"Required number of checks: %d \n", checks);
550
searchParams[
"checks"] = checks;
552
speedup = linear / searchTime;
554
delete[] gt_matches.data;
555
delete[] testDataset.data;
562
NNIndex<Distance>* bestIndex_;
564
IndexParams bestParams_;
565
SearchParams bestSearchParams_;
567
Matrix<ElementType> sampledDataset_;
568
Matrix<ElementType> testDataset_;
569
Matrix<int> gt_matches_;
576
const
Matrix<ElementType> dataset_;
581
float
target_precision_;
583
float
memory_weight_;
584
float
sample_fraction_;
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element minimum of two arrays or an array and a scalar.
CV_EXPORTS_W double kmeans(InputArray data, int K, InputOutputArray bestLabels, TermCriteria criteria, int attempts, int flags, OutputArray centers=noArray())
Finds centers of clusters and groups input samples around the clusters.
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails
Definition:
base.hpp:342