31
#ifndef OPENCV_FLANN_KMEANS_INDEX_H_
32
#define OPENCV_FLANN_KMEANS_INDEX_H_
45
#include "result_set.h"
47
#include "allocator.h"
52
#define BITS_PER_CHAR 8
53
#define BITS_PER_BASE 2
54
#define BASE_PER_CHAR (BITS_PER_CHAR/BITS_PER_BASE)
55
#define HISTOS_PER_BASE (1<<BITS_PER_BASE)
61
struct
KMeansIndexParams :
public
IndexParams
63
KMeansIndexParams(
int
branching = 32,
int
iterations = 11,
64
flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM,
65
float
cb_index = 0.2,
int
trees = 1 )
67
(*this)[
"algorithm"] = FLANN_INDEX_KMEANS;
69
(*this)[
"branching"] = branching;
71
(*this)[
"iterations"] = iterations;
73
(*this)[
"centers_init"] = centers_init;
75
(*this)[
"cb_index"] = cb_index;
77
(*this)[
"trees"] = trees;
88
template
<
typename
Distance>
89
class
KMeansIndex :
public
NNIndex<Distance>
92
typedef
typename
Distance::ElementType ElementType;
93
typedef
typename
Distance::ResultType DistanceType;
94
typedef
typename
Distance::CentersType CentersType;
96
typedef
typename
Distance::is_kdtree_distance is_kdtree_distance;
97
typedef
typename
Distance::is_vector_space_distance is_vector_space_distance;
101
typedef
void (KMeansIndex::* centersAlgFunction)(int,
int*, int,
int*,
int&);
106
centersAlgFunction chooseCenters;
120
void
chooseCentersRandom(
int
k,
int* indices,
int
indices_length,
int* centers,
int& centers_length)
122
UniqueRandom r(indices_length);
125
for
(index=0; index<k; ++index) {
126
bool
duplicate =
true;
132
centers_length = index;
136
centers[index] = indices[rnd];
138
for
(
int
j=0; j<index; ++j) {
139
DistanceType sq = distance_(dataset_[centers[index]], dataset_[centers[j]], dataset_.cols);
147
centers_length = index;
161
void
chooseCentersGonzales(
int
k,
int* indices,
int
indices_length,
int* centers,
int& centers_length)
163
int
n = indices_length;
165
int
rnd = rand_int(n);
168
centers[0] = indices[rnd];
171
for
(index=1; index<k; ++index) {
174
DistanceType best_val = 0;
175
for
(
int
j=0; j<n; ++j) {
176
DistanceType dist = distance_(dataset_[centers[0]],dataset_[indices[j]],dataset_.cols);
177
for
(
int
i=1; i<index; ++i) {
178
DistanceType tmp_dist = distance_(dataset_[centers[i]],dataset_[indices[j]],dataset_.cols);
188
if
(best_index!=-1) {
189
centers[index] = indices[best_index];
195
centers_length = index;
212
void
chooseCentersKMeanspp(
int
k,
int* indices,
int
indices_length,
int* centers,
int& centers_length)
214
int
n = indices_length;
216
double
currentPot = 0;
217
DistanceType* closestDistSq =
new
DistanceType[n];
220
int
index = rand_int(n);
222
centers[0] = indices[index];
224
for
(
int
i = 0; i < n; i++) {
225
closestDistSq[i] = distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols);
226
closestDistSq[i] = ensureSquareDistance<Distance>( closestDistSq[i] );
227
currentPot += closestDistSq[i];
231
const
int
numLocalTries = 1;
235
for
(centerCount = 1; centerCount < k; centerCount++) {
238
double
bestNewPot = -1;
239
int
bestNewIndex = -1;
240
for
(
int
localTrial = 0; localTrial < numLocalTries; localTrial++) {
244
double
randVal = rand_double(currentPot);
245
for
(index = 0; index < n-1; index++) {
246
if
(randVal <= closestDistSq[index])
break;
247
else
randVal -= closestDistSq[index];
252
for
(
int
i = 0; i < n; i++) {
253
DistanceType dist = distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols);
254
newPot +=
std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
258
if
((bestNewPot < 0)||(newPot < bestNewPot)) {
260
bestNewIndex = index;
265
centers[centerCount] = indices[bestNewIndex];
266
currentPot = bestNewPot;
267
for
(
int
i = 0; i < n; i++) {
268
DistanceType dist = distance_(dataset_[indices[i]], dataset_[indices[bestNewIndex]], dataset_.cols);
269
closestDistSq[i] =
std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
273
centers_length = centerCount;
275
delete[] closestDistSq;
282
flann_algorithm_t getType() const CV_OVERRIDE
284
return
FLANN_INDEX_KMEANS;
287
template<
class
CentersContainerType>
291
KMeansDistanceComputer(Distance _distance,
const
Matrix<ElementType>& _dataset,
292
const
int
_branching,
const
int* _indices,
const
CentersContainerType& _dcenters,
293
const
size_t
_veclen, std::vector<int> &_new_centroids,
294
std::vector<DistanceType> &_sq_dists)
295
: distance(_distance)
297
, branching(_branching)
299
, dcenters(_dcenters)
301
, new_centroids(_new_centroids)
302
, sq_dists(_sq_dists)
306
void
operator()(
const
cv::Range& range)
const
CV_OVERRIDE
308
const
int
begin = range.start;
309
const
int
end = range.end;
311
for(
int
i = begin; i<end; ++i)
313
DistanceType sq_dist(distance(dataset[indices[i]], dcenters[0], veclen));
315
for
(
int
j=1; j<branching; ++j) {
316
DistanceType new_sq_dist = distance(dataset[indices[i]], dcenters[j], veclen);
317
if
(sq_dist>new_sq_dist) {
319
sq_dist = new_sq_dist;
322
sq_dists[i] = sq_dist;
323
new_centroids[i] = new_centroid;
329
const
Matrix<ElementType>& dataset;
332
const
CentersContainerType& dcenters;
334
std::vector<int> &new_centroids;
335
std::vector<DistanceType> &sq_dists;
336
KMeansDistanceComputer& operator=(
const
KMeansDistanceComputer & ) {
return
*
this; }
346
KMeansIndex(
const
Matrix<ElementType>& inputData,
const
IndexParams& params = KMeansIndexParams(),
347
Distance d = Distance())
348
: dataset_(inputData), index_params_(params), root_(NULL), indices_(NULL), distance_(d)
352
size_ = dataset_.rows;
353
veclen_ = dataset_.cols;
355
branching_ = get_param(params,
"branching",32);
356
trees_ = get_param(params,
"trees",1);
357
iterations_ = get_param(params,
"iterations",11);
361
centers_init_ = get_param(params,
"centers_init",FLANN_CENTERS_RANDOM);
363
if
(centers_init_==FLANN_CENTERS_RANDOM) {
364
chooseCenters = &KMeansIndex::chooseCentersRandom;
366
else
if
(centers_init_==FLANN_CENTERS_GONZALES) {
367
chooseCenters = &KMeansIndex::chooseCentersGonzales;
369
else
if
(centers_init_==FLANN_CENTERS_KMEANSPP) {
370
chooseCenters = &KMeansIndex::chooseCentersKMeanspp;
373
FLANN_THROW(cv::Error::StsBadArg,
"Unknown algorithm for choosing initial centers.");
377
root_ =
new
KMeansNodePtr[trees_];
378
indices_ =
new
int*[trees_];
380
for
(
int
i=0; i<trees_; ++i) {
387
KMeansIndex(
const
KMeansIndex&);
388
KMeansIndex& operator=(
const
KMeansIndex&);
396
virtual
~KMeansIndex()
402
if
(indices_!=NULL) {
411
size_t
size() const CV_OVERRIDE
419
size_t
veclen() const CV_OVERRIDE
425
void
set_cb_index(
float
index)
434
int
usedMemory() const CV_OVERRIDE
436
return
pool_.usedMemory+pool_.wastedMemory+memoryCounter_;
442
void
buildIndex() CV_OVERRIDE
445
FLANN_THROW(cv::Error::StsError,
"Branching factor must be at least 2");
450
for
(
int
i=0; i<trees_; ++i) {
451
indices_[i] =
new
int[size_];
452
for
(
size_t
j=0; j<size_; ++j) {
453
indices_[i][j] = int(j);
455
root_[i] = pool_.allocate<KMeansNode>();
456
std::memset(root_[i], 0,
sizeof(KMeansNode));
458
Distance* dummy = NULL;
459
computeNodeStatistics(root_[i], indices_[i], (
unsigned
int)size_, dummy);
461
computeClustering(root_[i], indices_[i], (
int)size_, branching_,0);
466
void
saveIndex(FILE* stream) CV_OVERRIDE
468
save_value(stream, branching_);
469
save_value(stream, iterations_);
470
save_value(stream, memoryCounter_);
471
save_value(stream, cb_index_);
472
save_value(stream, trees_);
473
for
(
int
i=0; i<trees_; ++i) {
474
save_value(stream, *indices_[i], (
int)size_);
475
save_tree(stream, root_[i], i);
480
void
loadIndex(FILE* stream) CV_OVERRIDE
482
if
(indices_!=NULL) {
490
load_value(stream, branching_);
491
load_value(stream, iterations_);
492
load_value(stream, memoryCounter_);
493
load_value(stream, cb_index_);
494
load_value(stream, trees_);
496
indices_ =
new
int*[trees_];
497
for
(
int
i=0; i<trees_; ++i) {
498
indices_[i] =
new
int[size_];
499
load_value(stream, *indices_[i], size_);
500
load_tree(stream, root_[i], i);
503
index_params_[
"algorithm"] = getType();
504
index_params_[
"branching"] = branching_;
505
index_params_[
"trees"] = trees_;
506
index_params_[
"iterations"] = iterations_;
507
index_params_[
"centers_init"] = centers_init_;
508
index_params_[
"cb_index"] = cb_index_;
521
void
findNeighbors(ResultSet<DistanceType>& result,
const
ElementType* vec,
const
SearchParams& searchParams) CV_OVERRIDE
524
const
int
maxChecks = get_param(searchParams,
"checks",32);
526
if
(maxChecks==FLANN_CHECKS_UNLIMITED) {
527
findExactNN(root_[0], result, vec);
531
Heap<BranchSt>* heap =
new
Heap<BranchSt>((
int)size_);
534
for
(
int
i=0; i<trees_; ++i) {
535
findNN(root_[i], result, vec, checks, maxChecks, heap);
536
if
((checks >= maxChecks) && result.full())
541
while
(heap->popMin(branch) && (checks<maxChecks || !result.full())) {
542
KMeansNodePtr node = branch.node;
543
findNN(node, result, vec, checks, maxChecks, heap);
558
int
getClusterCenters(Matrix<CentersType>& centers)
560
int
numClusters = centers.rows;
562
FLANN_THROW(cv::Error::StsBadArg,
"Number of clusters must be at least 1");
565
DistanceType variance;
566
KMeansNodePtr* clusters =
new
KMeansNodePtr[numClusters];
568
int
clusterCount = getMinVarianceClusters(root_[0], clusters, numClusters, variance);
570
Logger::info(
"Clusters requested: %d, returning %d\n",numClusters, clusterCount);
572
for
(
int
i=0; i<clusterCount; ++i) {
573
CentersType* center = clusters[i]->pivot;
574
for
(
size_t
j=0; j<veclen_; ++j) {
575
centers[i][j] = center[j];
583
IndexParams getParameters() const CV_OVERRIDE
585
return
index_params_;
606
DistanceType mean_radius;
610
DistanceType variance;
628
typedef
KMeansNode* KMeansNodePtr;
633
typedef
BranchStruct<KMeansNodePtr, DistanceType> BranchSt;
638
void
save_tree(FILE* stream, KMeansNodePtr node,
int
num)
640
save_value(stream, *node);
641
save_value(stream, *(node->pivot), (
int)veclen_);
642
if
(node->childs==NULL) {
643
int
indices_offset = (int)(node->indices - indices_[num]);
644
save_value(stream, indices_offset);
647
for(
int
i=0; i<branching_; ++i) {
648
save_tree(stream, node->childs[i], num);
654
void
load_tree(FILE* stream, KMeansNodePtr& node,
int
num)
656
node = pool_.allocate<KMeansNode>();
657
load_value(stream, *node);
658
node->pivot =
new
CentersType[veclen_];
659
load_value(stream, *(node->pivot), (
int)veclen_);
660
if
(node->childs==NULL) {
662
load_value(stream, indices_offset);
663
node->indices = indices_[num] + indices_offset;
666
node->childs = pool_.allocate<KMeansNodePtr>(branching_);
667
for(
int
i=0; i<branching_; ++i) {
668
load_tree(stream, node->childs[i], num);
677
void
free_centers(KMeansNodePtr node)
679
delete[] node->pivot;
680
if
(node->childs!=NULL) {
681
for
(
int
k=0; k<branching_; ++k) {
682
free_centers(node->childs[k]);
690
for(
int
i=0; i<trees_; ++i) {
691
if
(root_[i] != NULL) {
692
free_centers(root_[i]);
703
if
(indices_!=NULL) {
704
for(
int
i=0; i<trees_; ++i) {
705
if
(indices_[i]!=NULL) {
706
delete[] indices_[i];
721
void
computeNodeStatistics(KMeansNodePtr node,
int* indices,
unsigned
int
indices_length)
723
DistanceType variance = 0;
724
CentersType* mean =
new
CentersType[veclen_];
725
memoryCounter_ += int(veclen_*
sizeof(CentersType));
727
memset(mean,0,veclen_*
sizeof(CentersType));
729
for
(
unsigned
int
i=0; i<indices_length; ++i) {
730
ElementType* vec = dataset_[indices[i]];
731
for
(
size_t
j=0; j<veclen_; ++j) {
734
variance += distance_(vec, ZeroIterator<ElementType>(), veclen_);
736
float
length =
static_cast<
float
>(indices_length);
737
for
(
size_t
j=0; j<veclen_; ++j) {
738
mean[j] = cvflann::round<CentersType>( mean[j] /
static_cast<
double
>(indices_length) );
740
variance /=
static_cast<DistanceType
>( length );
741
variance -= distance_(mean, ZeroIterator<ElementType>(), veclen_);
743
DistanceType radius = 0;
744
for
(
unsigned
int
i=0; i<indices_length; ++i) {
745
DistanceType tmp = distance_(mean, dataset_[indices[i]], veclen_);
751
node->variance = variance;
752
node->radius = radius;
757
void
computeBitfieldNodeStatistics(KMeansNodePtr node,
int* indices,
758
unsigned
int
indices_length)
760
const
unsigned
int
accumulator_veclen =
static_cast<
unsigned
int
>(
761
veclen_*
sizeof(CentersType)*BITS_PER_CHAR);
763
unsigned
long
long
variance = 0ull;
764
CentersType* mean =
new
CentersType[veclen_];
765
memoryCounter_ += int(veclen_*
sizeof(CentersType));
766
unsigned
int* mean_accumulator =
new
unsigned
int[accumulator_veclen];
768
memset(mean_accumulator, 0,
sizeof(
unsigned
int)*accumulator_veclen);
770
for
(
unsigned
int
i=0; i<indices_length; ++i) {
771
variance +=
static_cast<
unsigned
long
long
>( ensureSquareDistance<Distance>(
772
distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_)));
773
unsigned
char* vec = (
unsigned
char*)dataset_[indices[i]];
774
for
(
size_t
k=0, l=0; k<accumulator_veclen; k+=BITS_PER_CHAR, ++l) {
775
mean_accumulator[k] += (vec[l]) & 0x01;
776
mean_accumulator[k+1] += (vec[l]>>1) & 0x01;
777
mean_accumulator[k+2] += (vec[l]>>2) & 0x01;
778
mean_accumulator[k+3] += (vec[l]>>3) & 0x01;
779
mean_accumulator[k+4] += (vec[l]>>4) & 0x01;
780
mean_accumulator[k+5] += (vec[l]>>5) & 0x01;
781
mean_accumulator[k+6] += (vec[l]>>6) & 0x01;
782
mean_accumulator[k+7] += (vec[l]>>7) & 0x01;
785
double
cnt =
static_cast<
double
>(indices_length);
786
unsigned
char* char_mean = (
unsigned
char*)mean;
787
for
(
size_t
k=0, l=0; k<accumulator_veclen; k+=BITS_PER_CHAR, ++l) {
788
char_mean[l] =
static_cast<
unsigned
char
>(
789
(((int)(0.5 + (
double)(mean_accumulator[k]) / cnt)))
790
| (((int)(0.5 + (
double)(mean_accumulator[k+1]) / cnt))<<1)
791
| (((int)(0.5 + (
double)(mean_accumulator[k+2]) / cnt))<<2)
792
| (((int)(0.5 + (
double)(mean_accumulator[k+3]) / cnt))<<3)
793
| (((int)(0.5 + (
double)(mean_accumulator[k+4]) / cnt))<<4)
794
| (((int)(0.5 + (
double)(mean_accumulator[k+5]) / cnt))<<5)
795
| (((int)(0.5 + (
double)(mean_accumulator[k+6]) / cnt))<<6)
796
| (((int)(0.5 + (
double)(mean_accumulator[k+7]) / cnt))<<7));
798
variance =
static_cast<
unsigned
long
long
>(
799
0.5 +
static_cast<
double
>(variance) /
static_cast<
double
>(indices_length));
800
variance -=
static_cast<
unsigned
long
long
>(
801
ensureSquareDistance<Distance>(
802
distance_(mean, ZeroIterator<ElementType>(), veclen_)));
804
DistanceType radius = 0;
805
for
(
unsigned
int
i=0; i<indices_length; ++i) {
806
DistanceType tmp = distance_(mean, dataset_[indices[i]], veclen_);
812
node->variance =
static_cast<DistanceType
>(variance);
813
node->radius = radius;
816
delete[] mean_accumulator;
820
void
computeDnaNodeStatistics(KMeansNodePtr node,
int* indices,
821
unsigned
int
indices_length)
823
const
unsigned
int
histos_veclen =
static_cast<
unsigned
int
>(
824
veclen_*
sizeof(CentersType)*(HISTOS_PER_BASE*BASE_PER_CHAR));
826
unsigned
long
long
variance = 0ull;
827
unsigned
int* histograms =
new
unsigned
int[histos_veclen];
828
memset(histograms, 0,
sizeof(
unsigned
int)*histos_veclen);
830
for
(
unsigned
int
i=0; i<indices_length; ++i) {
831
variance +=
static_cast<
unsigned
long
long
>( ensureSquareDistance<Distance>(
832
distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_)));
834
unsigned
char* vec = (
unsigned
char*)dataset_[indices[i]];
835
for
(
size_t
k=0, l=0; k<histos_veclen; k+=HISTOS_PER_BASE*BASE_PER_CHAR, ++l) {
836
histograms[k + ((vec[l]) & 0x03)]++;
837
histograms[k + 4 + ((vec[l]>>2) & 0x03)]++;
838
histograms[k + 8 + ((vec[l]>>4) & 0x03)]++;
839
histograms[k +12 + ((vec[l]>>6) & 0x03)]++;
843
CentersType* mean =
new
CentersType[veclen_];
844
memoryCounter_ += int(veclen_*
sizeof(CentersType));
845
unsigned
char* char_mean = (
unsigned
char*)mean;
846
unsigned
int* h = histograms;
847
for
(
size_t
k=0, l=0; k<histos_veclen; k+=HISTOS_PER_BASE*BASE_PER_CHAR, ++l) {
848
char_mean[l] = (h[k] > h[k+1] ? h[k+2] > h[k+3] ? h[k] > h[k+2] ? 0x00 : 0x10
849
: h[k] > h[k+3] ? 0x00 : 0x11
850
: h[k+2] > h[k+3] ? h[k+1] > h[k+2] ? 0x01 : 0x10
851
: h[k+1] > h[k+3] ? 0x01 : 0x11)
852
| (h[k+4]>h[k+5] ? h[k+6] > h[k+7] ? h[k+4] > h[k+6] ? 0x00 : 0x1000
853
: h[k+4] > h[k+7] ? 0x00 : 0x1100
854
: h[k+6] > h[k+7] ? h[k+5] > h[k+6] ? 0x0100 : 0x1000
855
: h[k+5] > h[k+7] ? 0x0100 : 0x1100)
856
| (h[k+8]>h[k+9] ? h[k+10]>h[k+11] ? h[k+8] >h[k+10] ? 0x00 : 0x100000
857
: h[k+8] >h[k+11] ? 0x00 : 0x110000
858
: h[k+10]>h[k+11] ? h[k+9] >h[k+10] ? 0x010000 : 0x100000
859
: h[k+9] >h[k+11] ? 0x010000 : 0x110000)
860
| (h[k+12]>h[k+13] ? h[k+14]>h[k+15] ? h[k+12] >h[k+14] ? 0x00 : 0x10000000
861
: h[k+12] >h[k+15] ? 0x00 : 0x11000000
862
: h[k+14]>h[k+15] ? h[k+13] >h[k+14] ? 0x01000000 : 0x10000000
863
: h[k+13] >h[k+15] ? 0x01000000 : 0x11000000);
865
variance =
static_cast<
unsigned
long
long
>(
866
0.5 +
static_cast<
double
>(variance) /
static_cast<
double
>(indices_length));
867
variance -=
static_cast<
unsigned
long
long
>(
868
ensureSquareDistance<Distance>(
869
distance_(mean, ZeroIterator<ElementType>(), veclen_)));
871
DistanceType radius = 0;
872
for
(
unsigned
int
i=0; i<indices_length; ++i) {
873
DistanceType tmp = distance_(mean, dataset_[indices[i]], veclen_);
879
node->variance =
static_cast<DistanceType
>(variance);
880
node->radius = radius;
887
template<
typename
DistType>
888
void
computeNodeStatistics(KMeansNodePtr node,
int* indices,
889
unsigned
int
indices_length,
890
const
DistType* identifier)
893
computeNodeStatistics(node, indices, indices_length);
896
void
computeNodeStatistics(KMeansNodePtr node,
int* indices,
897
unsigned
int
indices_length,
898
const
cvflann::HammingLUT* identifier)
901
computeBitfieldNodeStatistics(node, indices, indices_length);
904
void
computeNodeStatistics(KMeansNodePtr node,
int* indices,
905
unsigned
int
indices_length,
906
const
cvflann::Hamming<unsigned char>* identifier)
909
computeBitfieldNodeStatistics(node, indices, indices_length);
912
void
computeNodeStatistics(KMeansNodePtr node,
int* indices,
913
unsigned
int
indices_length,
914
const
cvflann::Hamming2<unsigned char>* identifier)
917
computeBitfieldNodeStatistics(node, indices, indices_length);
920
void
computeNodeStatistics(KMeansNodePtr node,
int* indices,
921
unsigned
int
indices_length,
922
const
cvflann::DNAmmingLUT* identifier)
925
computeDnaNodeStatistics(node, indices, indices_length);
928
void
computeNodeStatistics(KMeansNodePtr node,
int* indices,
929
unsigned
int
indices_length,
930
const
cvflann::DNAmming2<unsigned char>* identifier)
933
computeDnaNodeStatistics(node, indices, indices_length);
937
void
refineClustering(
int* indices,
int
indices_length,
int
branching, CentersType** centers,
938
std::vector<DistanceType>& radiuses,
int* belongs_to,
int* count)
941
Matrix<double> dcenters(dcenters_buf.data(), branching, veclen_);
943
bool
converged =
false;
945
while
(!converged && iteration<iterations_) {
950
for
(
int
i=0; i<branching; ++i) {
951
memset(dcenters[i],0,
sizeof(
double)*veclen_);
954
for
(
int
i=0; i<indices_length; ++i) {
955
ElementType* vec = dataset_[indices[i]];
956
double* center = dcenters[belongs_to[i]];
957
for
(
size_t
k=0; k<veclen_; ++k) {
961
for
(
int
i=0; i<branching; ++i) {
963
for
(
size_t
k=0; k<veclen_; ++k) {
964
dcenters[i][k] /= cnt;
968
std::vector<int> new_centroids(indices_length);
969
std::vector<DistanceType> sq_dists(indices_length);
972
KMeansDistanceComputer<Matrix<double> > invoker(
973
distance_, dataset_, branching, indices, dcenters, veclen_, new_centroids, sq_dists);
976
for
(
int
i=0; i < (int)indices_length; ++i) {
977
DistanceType sq_dist(sq_dists[i]);
978
int
new_centroid(new_centroids[i]);
979
if
(sq_dist > radiuses[new_centroid]) {
980
radiuses[new_centroid] = sq_dist;
982
if
(new_centroid != belongs_to[i]) {
983
count[belongs_to[i]]--;
984
count[new_centroid]++;
985
belongs_to[i] = new_centroid;
990
for
(
int
i=0; i<branching; ++i) {
994
int
j = (i+1)%branching;
995
while
(count[j]<=1) {
999
for
(
int
k=0; k<indices_length; ++k) {
1000
if
(belongs_to[k]==j) {
1002
if
( distance_(dataset_[indices[k]], dcenters[j], veclen_) == radiuses[j] ) {
1015
for
(
int
i=0; i<branching; ++i) {
1016
centers[i] =
new
CentersType[veclen_];
1017
memoryCounter_ += (int)(veclen_*
sizeof(CentersType));
1018
for
(
size_t
k=0; k<veclen_; ++k) {
1019
centers[i][k] = (CentersType)dcenters[i][k];
1025
void
refineBitfieldClustering(
int* indices,
int
indices_length,
int
branching, CentersType** centers,
1026
std::vector<DistanceType>& radiuses,
int* belongs_to,
int* count)
1028
for
(
int
i=0; i<branching; ++i) {
1029
centers[i] =
new
CentersType[veclen_];
1030
memoryCounter_ += (int)(veclen_*
sizeof(CentersType));
1033
const
unsigned
int
accumulator_veclen =
static_cast<
unsigned
int
>(
1034
veclen_*
sizeof(ElementType)*BITS_PER_CHAR);
1036
Matrix<unsigned int> dcenters(dcenters_buf.data(), branching, accumulator_veclen);
1038
bool
converged =
false;
1040
while
(!converged && iteration<iterations_) {
1045
for
(
int
i=0; i<branching; ++i) {
1046
memset(dcenters[i],0,
sizeof(
unsigned
int)*accumulator_veclen);
1049
for
(
int
i=0; i<indices_length; ++i) {
1050
unsigned
char* vec = (
unsigned
char*)dataset_[indices[i]];
1051
unsigned
int* dcenter = dcenters[belongs_to[i]];
1052
for
(
size_t
k=0, l=0; k<accumulator_veclen; k+=BITS_PER_CHAR, ++l) {
1053
dcenter[k] += (vec[l]) & 0x01;
1054
dcenter[k+1] += (vec[l]>>1) & 0x01;
1055
dcenter[k+2] += (vec[l]>>2) & 0x01;
1056
dcenter[k+3] += (vec[l]>>3) & 0x01;
1057
dcenter[k+4] += (vec[l]>>4) & 0x01;
1058
dcenter[k+5] += (vec[l]>>5) & 0x01;
1059
dcenter[k+6] += (vec[l]>>6) & 0x01;
1060
dcenter[k+7] += (vec[l]>>7) & 0x01;
1063
for
(
int
i=0; i<branching; ++i) {
1064
double
cnt =
static_cast<
double
>(count[i]);
1065
unsigned
int* dcenter = dcenters[i];
1066
unsigned
char* charCenter = (
unsigned
char*)centers[i];
1067
for
(
size_t
k=0, l=0; k<accumulator_veclen; k+=BITS_PER_CHAR, ++l) {
1068
charCenter[l] =
static_cast<
unsigned
char
>(
1069
(((int)(0.5 + (
double)(dcenter[k]) / cnt)))
1070
| (((int)(0.5 + (
double)(dcenter[k+1]) / cnt))<<1)
1071
| (((int)(0.5 + (
double)(dcenter[k+2]) / cnt))<<2)
1072
| (((int)(0.5 + (
double)(dcenter[k+3]) / cnt))<<3)
1073
| (((int)(0.5 + (
double)(dcenter[k+4]) / cnt))<<4)
1074
| (((int)(0.5 + (
double)(dcenter[k+5]) / cnt))<<5)
1075
| (((int)(0.5 + (
double)(dcenter[k+6]) / cnt))<<6)
1076
| (((int)(0.5 + (
double)(dcenter[k+7]) / cnt))<<7));
1080
std::vector<int> new_centroids(indices_length);
1081
std::vector<DistanceType> dists(indices_length);
1084
KMeansDistanceComputer<ElementType**> invoker(
1085
distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists);
1088
for
(
int
i=0; i < indices_length; ++i) {
1089
DistanceType dist(dists[i]);
1090
int
new_centroid(new_centroids[i]);
1091
if
(dist > radiuses[new_centroid]) {
1092
radiuses[new_centroid] = dist;
1094
if
(new_centroid != belongs_to[i]) {
1095
count[belongs_to[i]]--;
1096
count[new_centroid]++;
1097
belongs_to[i] = new_centroid;
1102
for
(
int
i=0; i<branching; ++i) {
1106
int
j = (i+1)%branching;
1107
while
(count[j]<=1) {
1108
j = (j+1)%branching;
1111
for
(
int
k=0; k<indices_length; ++k) {
1112
if
(belongs_to[k]==j) {
1114
if
( distance_(dataset_[indices[k]], centers[j], veclen_) == radiuses[j] ) {
1129
void
refineDnaClustering(
int* indices,
int
indices_length,
int
branching, CentersType** centers,
1130
std::vector<DistanceType>& radiuses,
int* belongs_to,
int* count)
1132
for
(
int
i=0; i<branching; ++i) {
1133
centers[i] =
new
CentersType[veclen_];
1134
memoryCounter_ += (int)(veclen_*
sizeof(CentersType));
1137
const
unsigned
int
histos_veclen =
static_cast<
unsigned
int
>(
1138
veclen_*
sizeof(CentersType)*(HISTOS_PER_BASE*BASE_PER_CHAR));
1140
Matrix<unsigned int> histos(histos_buf.data(), branching, histos_veclen);
1142
bool
converged =
false;
1144
while
(!converged && iteration<iterations_) {
1149
for
(
int
i=0; i<branching; ++i) {
1150
memset(histos[i],0,
sizeof(
unsigned
int)*histos_veclen);
1153
for
(
int
i=0; i<indices_length; ++i) {
1154
unsigned
char* vec = (
unsigned
char*)dataset_[indices[i]];
1155
unsigned
int* h = histos[belongs_to[i]];
1156
for
(
size_t
k=0, l=0; k<histos_veclen; k+=HISTOS_PER_BASE*BASE_PER_CHAR, ++l) {
1157
h[k + ((vec[l]) & 0x03)]++;
1158
h[k + 4 + ((vec[l]>>2) & 0x03)]++;
1159
h[k + 8 + ((vec[l]>>4) & 0x03)]++;
1160
h[k +12 + ((vec[l]>>6) & 0x03)]++;
1163
for
(
int
i=0; i<branching; ++i) {
1164
unsigned
int* h = histos[i];
1165
unsigned
char* charCenter = (
unsigned
char*)centers[i];
1166
for
(
size_t
k=0, l=0; k<histos_veclen; k+=HISTOS_PER_BASE*BASE_PER_CHAR, ++l) {
1167
charCenter[l]= (h[k] > h[k+1] ? h[k+2] > h[k+3] ? h[k] > h[k+2] ? 0x00 : 0x10
1168
: h[k] > h[k+3] ? 0x00 : 0x11
1169
: h[k+2] > h[k+3] ? h[k+1] > h[k+2] ? 0x01 : 0x10
1170
: h[k+1] > h[k+3] ? 0x01 : 0x11)
1171
| (h[k+4]>h[k+5] ? h[k+6] > h[k+7] ? h[k+4] > h[k+6] ? 0x00 : 0x1000
1172
: h[k+4] > h[k+7] ? 0x00 : 0x1100
1173
: h[k+6] > h[k+7] ? h[k+5] > h[k+6] ? 0x0100 : 0x1000
1174
: h[k+5] > h[k+7] ? 0x0100 : 0x1100)
1175
| (h[k+8]>h[k+9] ? h[k+10]>h[k+11] ? h[k+8] >h[k+10] ? 0x00 : 0x100000
1176
: h[k+8] >h[k+11] ? 0x00 : 0x110000
1177
: h[k+10]>h[k+11] ? h[k+9] >h[k+10] ? 0x010000 : 0x100000
1178
: h[k+9] >h[k+11] ? 0x010000 : 0x110000)
1179
| (h[k+12]>h[k+13] ? h[k+14]>h[k+15] ? h[k+12] >h[k+14] ? 0x00 : 0x10000000
1180
: h[k+12] >h[k+15] ? 0x00 : 0x11000000
1181
: h[k+14]>h[k+15] ? h[k+13] >h[k+14] ? 0x01000000 : 0x10000000
1182
: h[k+13] >h[k+15] ? 0x01000000 : 0x11000000);
1186
std::vector<int> new_centroids(indices_length);
1187
std::vector<DistanceType> dists(indices_length);
1190
KMeansDistanceComputer<ElementType**> invoker(
1191
distance_, dataset_, branching, indices, centers, veclen_, new_centroids, dists);
1194
for
(
int
i=0; i < indices_length; ++i) {
1195
DistanceType dist(dists[i]);
1196
int
new_centroid(new_centroids[i]);
1197
if
(dist > radiuses[new_centroid]) {
1198
radiuses[new_centroid] = dist;
1200
if
(new_centroid != belongs_to[i]) {
1201
count[belongs_to[i]]--;
1202
count[new_centroid]++;
1203
belongs_to[i] = new_centroid;
1208
for
(
int
i=0; i<branching; ++i) {
1212
int
j = (i+1)%branching;
1213
while
(count[j]<=1) {
1214
j = (j+1)%branching;
1217
for
(
int
k=0; k<indices_length; ++k) {
1218
if
(belongs_to[k]==j) {
1220
if
( distance_(dataset_[indices[k]], centers[j], veclen_) == radiuses[j] ) {
1235
void
computeSubClustering(KMeansNodePtr node,
int* indices,
int
indices_length,
1236
int
branching,
int
level, CentersType** centers,
1237
std::vector<DistanceType>& radiuses,
int* belongs_to,
int* count)
1240
node->childs = pool_.allocate<KMeansNodePtr>(branching);
1243
for
(
int
c=0; c<branching; ++c) {
1246
DistanceType variance = 0;
1247
DistanceType mean_radius =0;
1248
for
(
int
i=0; i<indices_length; ++i) {
1249
if
(belongs_to[i]==c) {
1250
DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
1252
mean_radius +=
static_cast<DistanceType
>(
sqrt(d) );
1254
std::swap(belongs_to[i],belongs_to[end]);
1260
variance -= distance_(centers[c], ZeroIterator<ElementType>(), veclen_);
1262
node->childs[c] = pool_.allocate<KMeansNode>();
1263
std::memset(node->childs[c], 0,
sizeof(KMeansNode));
1264
node->childs[c]->radius = radiuses[c];
1265
node->childs[c]->pivot = centers[c];
1266
node->childs[c]->variance = variance;
1267
node->childs[c]->mean_radius = mean_radius;
1268
computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
1274
void
computeAnyBitfieldSubClustering(KMeansNodePtr node,
int* indices,
int
indices_length,
1275
int
branching,
int
level, CentersType** centers,
1276
std::vector<DistanceType>& radiuses,
int* belongs_to,
int* count)
1279
node->childs = pool_.allocate<KMeansNodePtr>(branching);
1282
for
(
int
c=0; c<branching; ++c) {
1285
unsigned
long
long
variance = 0ull;
1286
DistanceType mean_radius =0;
1287
for
(
int
i=0; i<indices_length; ++i) {
1288
if
(belongs_to[i]==c) {
1289
DistanceType d = distance_(dataset_[indices[i]], ZeroIterator<ElementType>(), veclen_);
1290
variance +=
static_cast<
unsigned
long
long
>( ensureSquareDistance<Distance>(d) );
1291
mean_radius += ensureSimpleDistance<Distance>(d);
1293
std::swap(belongs_to[i],belongs_to[end]);
1297
mean_radius =
static_cast<DistanceType
>(
1298
0.5f +
static_cast<
float
>(mean_radius) /
static_cast<
float
>(s));
1299
variance =
static_cast<
unsigned
long
long
>(
1300
0.5 +
static_cast<
double
>(variance) /
static_cast<
double
>(s));
1301
variance -=
static_cast<
unsigned
long
long
>(
1302
ensureSquareDistance<Distance>(
1303
distance_(centers[c], ZeroIterator<ElementType>(), veclen_)));
1305
node->childs[c] = pool_.allocate<KMeansNode>();
1306
std::memset(node->childs[c], 0,
sizeof(KMeansNode));
1307
node->childs[c]->radius = radiuses[c];
1308
node->childs[c]->pivot = centers[c];
1309
node->childs[c]->variance =
static_cast<DistanceType
>(variance);
1310
node->childs[c]->mean_radius = mean_radius;
1311
computeClustering(node->childs[c],indices+start, end-start, branching, level+1);
1317
template<
typename
DistType>
1318
void
refineAndSplitClustering(
1319
KMeansNodePtr node,
int* indices,
int
indices_length,
int
branching,
1320
int
level, CentersType** centers, std::vector<DistanceType>& radiuses,
1321
int* belongs_to,
int* count,
const
DistType* identifier)
1324
refineClustering(indices, indices_length, branching, centers, radiuses, belongs_to, count);
1326
computeSubClustering(node, indices, indices_length, branching,
1327
level, centers, radiuses, belongs_to, count);
1375
void
refineAndSplitClustering(
1376
KMeansNodePtr node,
int* indices,
int
indices_length,
int
branching,
1377
int
level, CentersType** centers, std::vector<DistanceType>& radiuses,
1378
int* belongs_to,
int* count,
const
cvflann::HammingLUT* identifier)
1381
refineBitfieldClustering(
1382
indices, indices_length, branching, centers, radiuses, belongs_to, count);
1384
computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
1385
level, centers, radiuses, belongs_to, count);
1389
void
refineAndSplitClustering(
1390
KMeansNodePtr node,
int* indices,
int
indices_length,
int
branching,
1391
int
level, CentersType** centers, std::vector<DistanceType>& radiuses,
1392
int* belongs_to,
int* count,
const
cvflann::Hamming<unsigned char>* identifier)
1395
refineBitfieldClustering(
1396
indices, indices_length, branching, centers, radiuses, belongs_to, count);
1398
computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
1399
level, centers, radiuses, belongs_to, count);
1403
void
refineAndSplitClustering(
1404
KMeansNodePtr node,
int* indices,
int
indices_length,
int
branching,
1405
int
level, CentersType** centers, std::vector<DistanceType>& radiuses,
1406
int* belongs_to,
int* count,
const
cvflann::Hamming2<unsigned char>* identifier)
1409
refineBitfieldClustering(
1410
indices, indices_length, branching, centers, radiuses, belongs_to, count);
1412
computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
1413
level, centers, radiuses, belongs_to, count);
1417
void
refineAndSplitClustering(
1418
KMeansNodePtr node,
int* indices,
int
indices_length,
int
branching,
1419
int
level, CentersType** centers, std::vector<DistanceType>& radiuses,
1420
int* belongs_to,
int* count,
const
cvflann::DNAmmingLUT* identifier)
1423
refineDnaClustering(
1424
indices, indices_length, branching, centers, radiuses, belongs_to, count);
1426
computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
1427
level, centers, radiuses, belongs_to, count);
1431
void
refineAndSplitClustering(
1432
KMeansNodePtr node,
int* indices,
int
indices_length,
int
branching,
1433
int
level, CentersType** centers, std::vector<DistanceType>& radiuses,
1434
int* belongs_to,
int* count,
const
cvflann::DNAmming2<unsigned char>* identifier)
1437
refineDnaClustering(
1438
indices, indices_length, branching, centers, radiuses, belongs_to, count);
1440
computeAnyBitfieldSubClustering(node, indices, indices_length, branching,
1441
level, centers, radiuses, belongs_to, count);
1456
void
computeClustering(KMeansNodePtr node,
int* indices,
int
indices_length,
int
branching,
int
level)
1458
node->size = indices_length;
1459
node->level = level;
1461
if
(indices_length < branching) {
1462
node->indices = indices;
1463
std::sort(node->indices,node->indices+indices_length);
1464
node->childs = NULL;
1469
int* centers_idx = centers_idx_buf.data();
1471
(this->*chooseCenters)(branching, indices, indices_length, centers_idx, centers_length);
1473
if
(centers_length<branching) {
1474
node->indices = indices;
1475
std::sort(node->indices,node->indices+indices_length);
1476
node->childs = NULL;
1481
std::vector<DistanceType> radiuses(branching);
1483
int* count = count_buf.data();
1484
for
(
int
i=0; i<branching; ++i) {
1491
int* belongs_to = belongs_to_buf.data();
1492
for
(
int
i=0; i<indices_length; ++i) {
1493
DistanceType sq_dist = distance_(dataset_[indices[i]], dataset_[centers_idx[0]], veclen_);
1495
for
(
int
j=1; j<branching; ++j) {
1496
DistanceType new_sq_dist = distance_(dataset_[indices[i]], dataset_[centers_idx[j]], veclen_);
1497
if
(sq_dist>new_sq_dist) {
1499
sq_dist = new_sq_dist;
1502
if
(sq_dist>radiuses[belongs_to[i]]) {
1503
radiuses[belongs_to[i]] = sq_dist;
1505
count[belongs_to[i]]++;
1508
CentersType** centers =
new
CentersType*[branching];
1510
Distance* dummy = NULL;
1511
refineAndSplitClustering(node, indices, indices_length, branching, level,
1512
centers, radiuses, belongs_to, count, dummy);
1531
void
findNN(KMeansNodePtr node, ResultSet<DistanceType>& result,
const
ElementType* vec,
int& checks,
int
maxChecks,
1532
Heap<BranchSt>* heap)
1536
DistanceType bsq = distance_(vec, node->pivot, veclen_);
1537
DistanceType rsq = node->radius;
1538
DistanceType wsq = result.worstDist();
1540
if
(isSquareDistance<Distance>())
1542
DistanceType val = bsq-rsq-wsq;
1543
if
((val>0) && (val*val > 4*rsq*wsq))
1553
if
(node->childs==NULL) {
1554
if
((checks>=maxChecks) && result.full()) {
1557
checks += node->size;
1558
for
(
int
i=0; i<node->size; ++i) {
1559
int
index = node->indices[i];
1560
DistanceType dist = distance_(dataset_[index], vec, veclen_);
1561
result.addPoint(dist, index);
1565
DistanceType* domain_distances =
new
DistanceType[branching_];
1566
int
closest_center = exploreNodeBranches(node, vec, domain_distances, heap);
1567
delete[] domain_distances;
1568
findNN(node->childs[closest_center],result,vec, checks, maxChecks, heap);
1580
int
exploreNodeBranches(KMeansNodePtr node,
const
ElementType* q, DistanceType* domain_distances, Heap<BranchSt>* heap)
1584
domain_distances[best_index] = distance_(q, node->childs[best_index]->pivot, veclen_);
1585
for
(
int
i=1; i<branching_; ++i) {
1586
domain_distances[i] = distance_(q, node->childs[i]->pivot, veclen_);
1587
if
(domain_distances[i]<domain_distances[best_index]) {
1593
for
(
int
i=0; i<branching_; ++i) {
1594
if
(i != best_index) {
1595
domain_distances[i] -= cvflann::round<DistanceType>(
1596
cb_index_*node->childs[i]->variance );
1602
heap->insert(BranchSt(node->childs[i],domain_distances[i]));
1613
void
findExactNN(KMeansNodePtr node, ResultSet<DistanceType>& result,
const
ElementType* vec)
1617
DistanceType bsq = distance_(vec, node->pivot, veclen_);
1618
DistanceType rsq = node->radius;
1619
DistanceType wsq = result.worstDist();
1621
if
(isSquareDistance<Distance>())
1623
DistanceType val = bsq-rsq-wsq;
1624
if
((val>0) && (val*val > 4*rsq*wsq))
1635
if
(node->childs==NULL) {
1636
for
(
int
i=0; i<node->size; ++i) {
1637
int
index = node->indices[i];
1638
DistanceType dist = distance_(dataset_[index], vec, veclen_);
1639
result.addPoint(dist, index);
1643
int* sort_indices =
new
int[branching_];
1645
getCenterOrdering(node, vec, sort_indices);
1647
for
(
int
i=0; i<branching_; ++i) {
1648
findExactNN(node->childs[sort_indices[i]],result,vec);
1651
delete[] sort_indices;
1661
void
getCenterOrdering(KMeansNodePtr node,
const
ElementType* q,
int* sort_indices)
1663
DistanceType* domain_distances =
new
DistanceType[branching_];
1664
for
(
int
i=0; i<branching_; ++i) {
1665
DistanceType dist = distance_(q, node->childs[i]->pivot, veclen_);
1668
while
(domain_distances[j]<dist && j<i)
1670
for
(
int
k=i; k>j; --k) {
1671
domain_distances[k] = domain_distances[k-1];
1672
sort_indices[k] = sort_indices[k-1];
1674
domain_distances[j] = dist;
1675
sort_indices[j] = i;
1677
delete[] domain_distances;
1685
DistanceType getDistanceToBorder(DistanceType* p, DistanceType* c, DistanceType* q)
1687
DistanceType sum = 0;
1688
DistanceType sum2 = 0;
1690
for
(
int
i=0; i<veclen_; ++i) {
1691
DistanceType t = c[i]-p[i];
1692
sum += t*(q[i]-(c[i]+p[i])/2);
1696
return
sum*sum/sum2;
1709
int
getMinVarianceClusters(KMeansNodePtr root, KMeansNodePtr* clusters,
int
clusters_length, DistanceType& varianceValue)
1711
int
clusterCount = 1;
1714
DistanceType meanVariance = root->variance*root->size;
1716
while
(clusterCount<clusters_length) {
1718
int
splitIndex = -1;
1720
for
(
int
i=0; i<clusterCount; ++i) {
1721
if
(clusters[i]->childs != NULL) {
1723
DistanceType variance = meanVariance - clusters[i]->variance*clusters[i]->size;
1725
for
(
int
j=0; j<branching_; ++j) {
1726
variance += clusters[i]->childs[j]->variance*clusters[i]->childs[j]->size;
1728
if
(variance<minVariance) {
1729
minVariance = variance;
1735
if
(splitIndex==-1)
break;
1736
if
( (branching_+clusterCount-1) > clusters_length)
break;
1738
meanVariance = minVariance;
1741
KMeansNodePtr toSplit = clusters[splitIndex];
1742
clusters[splitIndex] = toSplit->childs[0];
1743
for
(
int
i=1; i<branching_; ++i) {
1744
clusters[clusterCount++] = toSplit->childs[i];
1748
varianceValue = meanVariance/root->size;
1749
return
clusterCount;
1763
flann_centers_init_t centers_init_;
1776
const
Matrix<ElementType> dataset_;
1779
IndexParams index_params_;
1794
KMeansNodePtr* root_;
1809
PooledAllocator pool_;
Automatically Allocated Buffer Class
Definition:
utility.hpp:102
Base class for parallel data processors
Definition:
utility.hpp:577
Template class specifying a continuous subsequence (slice) of a sequence.
Definition:
core/types.hpp:590
CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element maximum of two arrays or an array and a scalar.
CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst)
Calculates a square root of array elements.
CV_EXPORTS_W void sort(InputArray src, OutputArray dst, int flags)
Sorts each row or each column of a matrix.
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element minimum of two arrays or an array and a scalar.
CV_EXPORTS void parallel_for_(const Range &range, const ParallelLoopBody &body, double nstripes=-1.)
Parallel data processor
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails
Definition:
base.hpp:342
#define CV_DbgAssert(expr)
Definition:
base.hpp:375
CV_EXPORTS void swap(Mat &a, Mat &b)
Swaps two matrices