31
#ifndef OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
32
#define OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
45
#include "result_set.h"
47
#include "allocator.h"
55
struct
HierarchicalClusteringIndexParams :
public
IndexParams
57
HierarchicalClusteringIndexParams(
int
branching = 32,
58
flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM,
59
int
trees = 4,
int
leaf_size = 100)
61
(*this)[
"algorithm"] = FLANN_INDEX_HIERARCHICAL;
63
(*this)[
"branching"] = branching;
65
(*this)[
"centers_init"] = centers_init;
67
(*this)[
"trees"] = trees;
69
(*this)[
"leaf_size"] = leaf_size;
80
template
<
typename
Distance>
81
class
HierarchicalClusteringIndex :
public
NNIndex<Distance>
84
typedef
typename
Distance::ElementType ElementType;
85
typedef
typename
Distance::ResultType DistanceType;
90
typedef
void (HierarchicalClusteringIndex::* centersAlgFunction)(int,
int*, int,
int*,
int&);
95
centersAlgFunction chooseCenters;
109
void
chooseCentersRandom(
int
k,
int* dsindices,
int
indices_length,
int* centers,
int& centers_length)
111
UniqueRandom r(indices_length);
114
for
(index=0; index<k; ++index) {
115
bool
duplicate =
true;
121
centers_length = index;
125
centers[index] = dsindices[rnd];
127
for
(
int
j=0; j<index; ++j) {
128
DistanceType sq = distance(dataset[centers[index]], dataset[centers[j]], dataset.cols);
136
centers_length = index;
150
void
chooseCentersGonzales(
int
k,
int* dsindices,
int
indices_length,
int* centers,
int& centers_length)
152
int
n = indices_length;
154
int
rnd = rand_int(n);
157
centers[0] = dsindices[rnd];
160
for
(index=1; index<k; ++index) {
163
DistanceType best_val = 0;
164
for
(
int
j=0; j<n; ++j) {
165
DistanceType dist = distance(dataset[centers[0]],dataset[dsindices[j]],dataset.cols);
166
for
(
int
i=1; i<index; ++i) {
167
DistanceType tmp_dist = distance(dataset[centers[i]],dataset[dsindices[j]],dataset.cols);
177
if
(best_index!=-1) {
178
centers[index] = dsindices[best_index];
184
centers_length = index;
201
void
chooseCentersKMeanspp(
int
k,
int* dsindices,
int
indices_length,
int* centers,
int& centers_length)
203
int
n = indices_length;
205
double
currentPot = 0;
206
DistanceType* closestDistSq =
new
DistanceType[n];
209
int
index = rand_int(n);
211
centers[0] = dsindices[index];
215
for
(
int
i = 0; i < n; i++) {
216
closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
217
closestDistSq[i] = ensureSquareDistance<Distance>( closestDistSq[i] );
218
currentPot += closestDistSq[i];
222
const
int
numLocalTries = 1;
226
for
(centerCount = 1; centerCount < k; centerCount++) {
229
double
bestNewPot = -1;
230
int
bestNewIndex = 0;
231
for
(
int
localTrial = 0; localTrial < numLocalTries; localTrial++) {
235
double
randVal = rand_double(currentPot);
236
for
(index = 0; index < n-1; index++) {
237
if
(randVal <= closestDistSq[index])
break;
238
else
randVal -= closestDistSq[index];
243
for
(
int
i = 0; i < n; i++) {
244
DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
245
newPot +=
std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
249
if
((bestNewPot < 0)||(newPot < bestNewPot)) {
251
bestNewIndex = index;
256
centers[centerCount] = dsindices[bestNewIndex];
257
currentPot = bestNewPot;
258
for
(
int
i = 0; i < n; i++) {
259
DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols);
260
closestDistSq[i] =
std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
264
centers_length = centerCount;
266
delete[] closestDistSq;
287
void
GroupWiseCenterChooser(
int
k,
int* dsindices,
int
indices_length,
int* centers,
int& centers_length)
289
const
float
kSpeedUpFactor = 1.3f;
291
int
n = indices_length;
293
DistanceType* closestDistSq =
new
DistanceType[n];
296
int
index = rand_int(n);
298
centers[0] = dsindices[index];
300
for
(
int
i = 0; i < n; i++) {
301
closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
307
for
(centerCount = 1; centerCount < k; centerCount++) {
310
double
bestNewPot = -1;
311
int
bestNewIndex = 0;
312
DistanceType furthest = 0;
313
for
(index = 0; index < n; index++) {
316
if( closestDistSq[index] > kSpeedUpFactor * (
float)furthest ) {
320
for
(
int
i = 0; i < n; i++) {
321
newPot +=
std::min( distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols)
322
, closestDistSq[i] );
326
if
((bestNewPot < 0)||(newPot <= bestNewPot)) {
328
bestNewIndex = index;
329
furthest = closestDistSq[index];
335
centers[centerCount] = dsindices[bestNewIndex];
336
for
(
int
i = 0; i < n; i++) {
337
closestDistSq[i] =
std::min( distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols)
338
, closestDistSq[i] );
342
centers_length = centerCount;
344
delete[] closestDistSq;
358
HierarchicalClusteringIndex(
const
Matrix<ElementType>& inputData,
const
IndexParams& index_params = HierarchicalClusteringIndexParams(),
359
Distance d = Distance())
360
: dataset(inputData), params(index_params), root(NULL), indices(NULL), distance(d)
364
size_ = dataset.rows;
365
veclen_ = dataset.cols;
367
branching_ = get_param(params,
"branching",32);
368
centers_init_ = get_param(params,
"centers_init", FLANN_CENTERS_RANDOM);
369
trees_ = get_param(params,
"trees",4);
370
leaf_size_ = get_param(params,
"leaf_size",100);
372
if
(centers_init_==FLANN_CENTERS_RANDOM) {
373
chooseCenters = &HierarchicalClusteringIndex::chooseCentersRandom;
375
else
if
(centers_init_==FLANN_CENTERS_GONZALES) {
376
chooseCenters = &HierarchicalClusteringIndex::chooseCentersGonzales;
378
else
if
(centers_init_==FLANN_CENTERS_KMEANSPP) {
379
chooseCenters = &HierarchicalClusteringIndex::chooseCentersKMeanspp;
381
else
if
(centers_init_==FLANN_CENTERS_GROUPWISE) {
382
chooseCenters = &HierarchicalClusteringIndex::GroupWiseCenterChooser;
385
FLANN_THROW(cv::Error::StsError,
"Unknown algorithm for choosing initial centers.");
388
root =
new
NodePtr[trees_];
389
indices =
new
int*[trees_];
391
for
(
int
i=0; i<trees_; ++i) {
397
HierarchicalClusteringIndex(
const
HierarchicalClusteringIndex&);
398
HierarchicalClusteringIndex& operator=(
const
HierarchicalClusteringIndex&);
405
virtual
~HierarchicalClusteringIndex()
420
size_t
size() const CV_OVERRIDE
428
size_t
veclen() const CV_OVERRIDE
438
int
usedMemory() const CV_OVERRIDE
440
return
pool.usedMemory+pool.wastedMemory+memoryCounter;
446
void
buildIndex() CV_OVERRIDE
449
FLANN_THROW(cv::Error::StsError,
"Branching factor must be at least 2");
454
for
(
int
i=0; i<trees_; ++i) {
455
indices[i] =
new
int[size_];
456
for
(
size_t
j=0; j<size_; ++j) {
457
indices[i][j] = (int)j;
459
root[i] = pool.allocate<Node>();
460
computeClustering(root[i], indices[i], (
int)size_, branching_,0);
465
flann_algorithm_t getType() const CV_OVERRIDE
467
return
FLANN_INDEX_HIERARCHICAL;
471
void
saveIndex(FILE* stream) CV_OVERRIDE
473
save_value(stream, branching_);
474
save_value(stream, trees_);
475
save_value(stream, centers_init_);
476
save_value(stream, leaf_size_);
477
save_value(stream, memoryCounter);
478
for
(
int
i=0; i<trees_; ++i) {
479
save_value(stream, *indices[i], size_);
480
save_tree(stream, root[i], i);
486
void
loadIndex(FILE* stream) CV_OVERRIDE
497
load_value(stream, branching_);
498
load_value(stream, trees_);
499
load_value(stream, centers_init_);
500
load_value(stream, leaf_size_);
501
load_value(stream, memoryCounter);
503
indices =
new
int*[trees_];
504
root =
new
NodePtr[trees_];
505
for
(
int
i=0; i<trees_; ++i) {
506
indices[i] =
new
int[size_];
507
load_value(stream, *indices[i], size_);
508
load_tree(stream, root[i], i);
511
params[
"algorithm"] = getType();
512
params[
"branching"] = branching_;
513
params[
"trees"] = trees_;
514
params[
"centers_init"] = centers_init_;
515
params[
"leaf_size"] = leaf_size_;
528
void
findNeighbors(ResultSet<DistanceType>& result,
const
ElementType* vec,
const
SearchParams& searchParams) CV_OVERRIDE
531
const
int
maxChecks = get_param(searchParams,
"checks",32);
532
const
bool
explore_all_trees = get_param(searchParams,
"explore_all_trees",
false);
535
Heap<BranchSt>* heap =
new
Heap<BranchSt>((
int)size_);
537
std::vector<bool> checked(size_,
false);
539
for
(
int
i=0; i<trees_; ++i) {
540
findNN(root[i], result, vec, checks, maxChecks, heap, checked, explore_all_trees);
541
if
(!explore_all_trees && (checks >= maxChecks) && result.full())
546
while
(heap->popMin(branch) && (checks<maxChecks || !result.full())) {
547
NodePtr node = branch.node;
548
findNN(node, result, vec, checks, maxChecks, heap, checked,
false);
556
IndexParams getParameters() const CV_OVERRIDE
590
typedef
Node* NodePtr;
597
typedef
BranchStruct<NodePtr, DistanceType> BranchSt;
601
void
save_tree(FILE* stream, NodePtr node,
int
num)
603
save_value(stream, *node);
604
if
(node->childs==NULL) {
605
int
indices_offset = (int)(node->indices - indices[num]);
606
save_value(stream, indices_offset);
609
for(
int
i=0; i<branching_; ++i) {
610
save_tree(stream, node->childs[i], num);
616
void
load_tree(FILE* stream, NodePtr& node,
int
num)
618
node = pool.allocate<Node>();
619
load_value(stream, *node);
620
if
(node->childs==NULL) {
622
load_value(stream, indices_offset);
623
node->indices = indices[num] + indices_offset;
626
node->childs = pool.allocate<NodePtr>(branching_);
627
for(
int
i=0; i<branching_; ++i) {
628
load_tree(stream, node->childs[i], num);
640
for(
int
i=0; i<trees_; ++i) {
641
if
(indices[i]!=NULL) {
650
void
computeLabels(
int* dsindices,
int
indices_length,
int* centers,
int
centers_length,
int* labels, DistanceType& cost)
653
for
(
int
i=0; i<indices_length; ++i) {
654
ElementType* point = dataset[dsindices[i]];
655
DistanceType dist = distance(point, dataset[centers[0]], veclen_);
657
for
(
int
j=1; j<centers_length; ++j) {
658
DistanceType new_dist = distance(point, dataset[centers[j]], veclen_);
679
void
computeClustering(NodePtr node,
int* dsindices,
int
indices_length,
int
branching,
int
level)
681
node->size = indices_length;
684
if
(indices_length < leaf_size_) {
685
node->indices = dsindices;
686
std::sort(node->indices,node->indices+indices_length);
691
std::vector<int> centers(branching);
692
std::vector<int> labels(indices_length);
695
(this->*chooseCenters)(branching, dsindices, indices_length, ¢ers[0], centers_length);
697
if
(centers_length<branching) {
698
node->indices = dsindices;
699
std::sort(node->indices,node->indices+indices_length);
707
computeLabels(dsindices, indices_length, ¢ers[0], centers_length, &labels[0], cost);
709
node->childs = pool.allocate<NodePtr>(branching);
712
for
(
int
i=0; i<branching; ++i) {
713
for
(
int
j=0; j<indices_length; ++j) {
721
node->childs[i] = pool.allocate<Node>();
722
node->childs[i]->pivot = centers[i];
723
node->childs[i]->indices = NULL;
724
computeClustering(node->childs[i],dsindices+start, end-start, branching, level+1);
744
void
findNN(NodePtr node, ResultSet<DistanceType>& result,
const
ElementType* vec,
int& checks,
int
maxChecks,
745
Heap<BranchSt>* heap, std::vector<bool>& checked,
bool
explore_all_trees =
false)
747
if
(node->childs==NULL) {
748
if
(!explore_all_trees && (checks>=maxChecks) && result.full()) {
751
for
(
int
i=0; i<node->size; ++i) {
752
int
index = node->indices[i];
753
if
(!checked[index]) {
754
DistanceType dist = distance(dataset[index], vec, veclen_);
755
result.addPoint(dist, index);
756
checked[index] =
true;
762
DistanceType* domain_distances =
new
DistanceType[branching_];
764
domain_distances[best_index] = distance(vec, dataset[node->childs[best_index]->pivot], veclen_);
765
for
(
int
i=1; i<branching_; ++i) {
766
domain_distances[i] = distance(vec, dataset[node->childs[i]->pivot], veclen_);
767
if
(domain_distances[i]<domain_distances[best_index]) {
771
for
(
int
i=0; i<branching_; ++i) {
773
heap->insert(BranchSt(node->childs[i],domain_distances[i]));
776
delete[] domain_distances;
777
findNN(node->childs[best_index],result,vec, checks, maxChecks, heap, checked, explore_all_trees);
787
const
Matrix<ElementType> dataset;
828
PooledAllocator pool;
838
flann_centers_init_t centers_init_;
CV_EXPORTS_W void sort(InputArray src, OutputArray dst, int flags)
Sorts each row or each column of a matrix.
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element minimum of two arrays or an array and a scalar.
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails
Definition:
base.hpp:342
#define CV_DbgAssert(expr)
Definition:
base.hpp:375
CV_EXPORTS void swap(Mat &a, Mat &b)
Swaps two matrices