OpenCV 4.5.3(日本語機械翻訳)
hierarchical_clustering_index.h
1 /***********************************************************************
2 * Software License Agreement (BSD License)
3 *
4 * Copyright 2008-2011 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5 * Copyright 2008-2011 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6 *
7 * THE BSD LICENSE
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *************************************************************************/
30
31 #ifndef OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
32 #define OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
33
35
36 #include <algorithm>
37 #include <map>
38 #include <limits>
39 #include <cmath>
40
41 #include "general.h"
42 #include "nn_index.h"
43 #include "dist.h"
44 #include "matrix.h"
45 #include "result_set.h"
46 #include "heap.h"
47 #include "allocator.h"
48 #include "random.h"
49 #include "saving.h"
50
51
52 namespace cvflann
53{
54
55 struct HierarchicalClusteringIndexParams : public IndexParams
56{
57 HierarchicalClusteringIndexParams(int branching = 32,
58 flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM,
59 int trees = 4, int leaf_size = 100)
60 {
61 (*this)["algorithm"] = FLANN_INDEX_HIERARCHICAL;
62 // The branching factor used in the hierarchical clustering
63 (*this)["branching"] = branching;
64 // Algorithm used for picking the initial cluster centers
65 (*this)["centers_init"] = centers_init;
66 // number of parallel trees to build
67 (*this)["trees"] = trees;
68 // maximum leaf size
69 (*this)["leaf_size"] = leaf_size;
70 }
71};
72
73
80 template <typename Distance>
81 class HierarchicalClusteringIndex : public NNIndex<Distance>
82{
83 public:
84 typedef typename Distance::ElementType ElementType;
85 typedef typename Distance::ResultType DistanceType;
86
87 private:
88
89
90 typedef void (HierarchicalClusteringIndex::* centersAlgFunction)(int, int*, int, int*, int&);
91
95 centersAlgFunction chooseCenters;
96
97
98
109 void chooseCentersRandom(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
110 {
111 UniqueRandom r(indices_length);
112
113 int index;
114 for (index=0; index<k; ++index) {
115 bool duplicate = true;
116 int rnd;
117 while (duplicate) {
118 duplicate = false;
119 rnd = r.next();
120 if (rnd<0) {
121 centers_length = index;
122 return;
123 }
124
125 centers[index] = dsindices[rnd];
126
127 for (int j=0; j<index; ++j) {
128 DistanceType sq = distance(dataset[centers[index]], dataset[centers[j]], dataset.cols);
129 if (sq<1e-16) {
130 duplicate = true;
131 }
132 }
133 }
134 }
135
136 centers_length = index;
137 }
138
139
150 void chooseCentersGonzales(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
151 {
152 int n = indices_length;
153
154 int rnd = rand_int(n);
155 CV_DbgAssert(rnd >=0 && rnd < n);
156
157 centers[0] = dsindices[rnd];
158
159 int index;
160 for (index=1; index<k; ++index) {
161
162 int best_index = -1;
163 DistanceType best_val = 0;
164 for (int j=0; j<n; ++j) {
165 DistanceType dist = distance(dataset[centers[0]],dataset[dsindices[j]],dataset.cols);
166 for (int i=1; i<index; ++i) {
167 DistanceType tmp_dist = distance(dataset[centers[i]],dataset[dsindices[j]],dataset.cols);
168 if (tmp_dist<dist) {
169 dist = tmp_dist;
170 }
171 }
172 if (dist>best_val) {
173 best_val = dist;
174 best_index = j;
175 }
176 }
177 if (best_index!=-1) {
178 centers[index] = dsindices[best_index];
179 }
180 else {
181 break;
182 }
183 }
184 centers_length = index;
185 }
186
187
201 void chooseCentersKMeanspp(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
202 {
203 int n = indices_length;
204
205 double currentPot = 0;
206 DistanceType* closestDistSq = new DistanceType[n];
207
208 // Choose one random center and set the closestDistSq values
209 int index = rand_int(n);
210 CV_DbgAssert(index >=0 && index < n);
211 centers[0] = dsindices[index];
212
213 // Computing distance^2 will have the advantage of even higher probability further to pick new centers
214 // far from previous centers (and this complies to "k-means++: the advantages of careful seeding" article)
215 for (int i = 0; i < n; i++) {
216 closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
217 closestDistSq[i] = ensureSquareDistance<Distance>( closestDistSq[i] );
218 currentPot += closestDistSq[i];
219 }
220
221
222 const int numLocalTries = 1;
223
224 // Choose each center
225 int centerCount;
226 for (centerCount = 1; centerCount < k; centerCount++) {
227
228 // Repeat several trials
229 double bestNewPot = -1;
230 int bestNewIndex = 0;
231 for (int localTrial = 0; localTrial < numLocalTries; localTrial++) {
232
233 // Choose our center - have to be slightly careful to return a valid answer even accounting
234 // for possible rounding errors
235 double randVal = rand_double(currentPot);
236 for (index = 0; index < n-1; index++) {
237 if (randVal <= closestDistSq[index]) break;
238 else randVal -= closestDistSq[index];
239 }
240
241 // Compute the new potential
242 double newPot = 0;
243 for (int i = 0; i < n; i++) {
244 DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
245 newPot += std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
246 }
247
248 // Store the best result
249 if ((bestNewPot < 0)||(newPot < bestNewPot)) {
250 bestNewPot = newPot;
251 bestNewIndex = index;
252 }
253 }
254
255 // Add the appropriate center
256 centers[centerCount] = dsindices[bestNewIndex];
257 currentPot = bestNewPot;
258 for (int i = 0; i < n; i++) {
259 DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols);
260 closestDistSq[i] = std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
261 }
262 }
263
264 centers_length = centerCount;
265
266 delete[] closestDistSq;
267 }
268
269
287 void GroupWiseCenterChooser(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
288 {
289 const float kSpeedUpFactor = 1.3f;
290
291 int n = indices_length;
292
293 DistanceType* closestDistSq = new DistanceType[n];
294
295 // Choose one random center and set the closestDistSq values
296 int index = rand_int(n);
297 CV_DbgAssert(index >=0 && index < n);
298 centers[0] = dsindices[index];
299
300 for (int i = 0; i < n; i++) {
301 closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
302 }
303
304
305 // Choose each center
306 int centerCount;
307 for (centerCount = 1; centerCount < k; centerCount++) {
308
309 // Repeat several trials
310 double bestNewPot = -1;
311 int bestNewIndex = 0;
312 DistanceType furthest = 0;
313 for (index = 0; index < n; index++) {
314
315 // We will test only the potential of the points further than current candidate
316 if( closestDistSq[index] > kSpeedUpFactor * (float)furthest ) {
317
318 // Compute the new potential
319 double newPot = 0;
320 for (int i = 0; i < n; i++) {
321 newPot += std::min( distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols)
322 , closestDistSq[i] );
323 }
324
325 // Store the best result
326 if ((bestNewPot < 0)||(newPot <= bestNewPot)) {
327 bestNewPot = newPot;
328 bestNewIndex = index;
329 furthest = closestDistSq[index];
330 }
331 }
332 }
333
334 // Add the appropriate center
335 centers[centerCount] = dsindices[bestNewIndex];
336 for (int i = 0; i < n; i++) {
337 closestDistSq[i] = std::min( distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols)
338 , closestDistSq[i] );
339 }
340 }
341
342 centers_length = centerCount;
343
344 delete[] closestDistSq;
345 }
346
347
348 public:
349
350
358 HierarchicalClusteringIndex(const Matrix<ElementType>& inputData, const IndexParams& index_params = HierarchicalClusteringIndexParams(),
359 Distance d = Distance())
360 : dataset(inputData), params(index_params), root(NULL), indices(NULL), distance(d)
361 {
362 memoryCounter = 0;
363
364 size_ = dataset.rows;
365 veclen_ = dataset.cols;
366
367 branching_ = get_param(params,"branching",32);
368 centers_init_ = get_param(params,"centers_init", FLANN_CENTERS_RANDOM);
369 trees_ = get_param(params,"trees",4);
370 leaf_size_ = get_param(params,"leaf_size",100);
371
372 if (centers_init_==FLANN_CENTERS_RANDOM) {
373 chooseCenters = &HierarchicalClusteringIndex::chooseCentersRandom;
374 }
375 else if (centers_init_==FLANN_CENTERS_GONZALES) {
376 chooseCenters = &HierarchicalClusteringIndex::chooseCentersGonzales;
377 }
378 else if (centers_init_==FLANN_CENTERS_KMEANSPP) {
379 chooseCenters = &HierarchicalClusteringIndex::chooseCentersKMeanspp;
380 }
381 else if (centers_init_==FLANN_CENTERS_GROUPWISE) {
382 chooseCenters = &HierarchicalClusteringIndex::GroupWiseCenterChooser;
383 }
384 else {
385 FLANN_THROW(cv::Error::StsError, "Unknown algorithm for choosing initial centers.");
386 }
387
388 root = new NodePtr[trees_];
389 indices = new int*[trees_];
390
391 for (int i=0; i<trees_; ++i) {
392 root[i] = NULL;
393 indices[i] = NULL;
394 }
395 }
396
397 HierarchicalClusteringIndex(const HierarchicalClusteringIndex&);
398 HierarchicalClusteringIndex& operator=(const HierarchicalClusteringIndex&);
399
405 virtual ~HierarchicalClusteringIndex()
406 {
407 if (root!=NULL) {
408 delete[] root;
409 }
410
411 if (indices!=NULL) {
412 free_indices();
413 delete[] indices;
414 }
415 }
416
420 size_t size() const CV_OVERRIDE
421 {
422 return size_;
423 }
424
428 size_t veclen() const CV_OVERRIDE
429 {
430 return veclen_;
431 }
432
433
438 int usedMemory() const CV_OVERRIDE
439 {
440 return pool.usedMemory+pool.wastedMemory+memoryCounter;
441 }
442
446 void buildIndex() CV_OVERRIDE
447 {
448 if (branching_<2) {
449 FLANN_THROW(cv::Error::StsError, "Branching factor must be at least 2");
450 }
451
452 free_indices();
453
454 for (int i=0; i<trees_; ++i) {
455 indices[i] = new int[size_];
456 for (size_t j=0; j<size_; ++j) {
457 indices[i][j] = (int)j;
458 }
459 root[i] = pool.allocate<Node>();
460 computeClustering(root[i], indices[i], (int)size_, branching_,0);
461 }
462 }
463
464
465 flann_algorithm_t getType() const CV_OVERRIDE
466 {
467 return FLANN_INDEX_HIERARCHICAL;
468 }
469
470
471 void saveIndex(FILE* stream) CV_OVERRIDE
472 {
473 save_value(stream, branching_);
474 save_value(stream, trees_);
475 save_value(stream, centers_init_);
476 save_value(stream, leaf_size_);
477 save_value(stream, memoryCounter);
478 for (int i=0; i<trees_; ++i) {
479 save_value(stream, *indices[i], size_);
480 save_tree(stream, root[i], i);
481 }
482
483 }
484
485
486 void loadIndex(FILE* stream) CV_OVERRIDE
487 {
488 if (root!=NULL) {
489 delete[] root;
490 }
491
492 if (indices!=NULL) {
493 free_indices();
494 delete[] indices;
495 }
496
497 load_value(stream, branching_);
498 load_value(stream, trees_);
499 load_value(stream, centers_init_);
500 load_value(stream, leaf_size_);
501 load_value(stream, memoryCounter);
502
503 indices = new int*[trees_];
504 root = new NodePtr[trees_];
505 for (int i=0; i<trees_; ++i) {
506 indices[i] = new int[size_];
507 load_value(stream, *indices[i], size_);
508 load_tree(stream, root[i], i);
509 }
510
511 params["algorithm"] = getType();
512 params["branching"] = branching_;
513 params["trees"] = trees_;
514 params["centers_init"] = centers_init_;
515 params["leaf_size"] = leaf_size_;
516 }
517
518
528 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE
529 {
530
531 const int maxChecks = get_param(searchParams,"checks",32);
532 const bool explore_all_trees = get_param(searchParams,"explore_all_trees",false);
533
534 // Priority queue storing intermediate branches in the best-bin-first search
535 Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_);
536
537 std::vector<bool> checked(size_,false);
538 int checks = 0;
539 for (int i=0; i<trees_; ++i) {
540 findNN(root[i], result, vec, checks, maxChecks, heap, checked, explore_all_trees);
541 if (!explore_all_trees && (checks >= maxChecks) && result.full())
542 break;
543 }
544
545 BranchSt branch;
546 while (heap->popMin(branch) && (checks<maxChecks || !result.full())) {
547 NodePtr node = branch.node;
548 findNN(node, result, vec, checks, maxChecks, heap, checked, false);
549 }
550
551 delete heap;
552
553 CV_Assert(result.full());
554 }
555
556 IndexParams getParameters() const CV_OVERRIDE
557 {
558 return params;
559 }
560
561
562 private:
563
567 struct Node
568 {
572 int pivot;
576 int size;
580 Node** childs;
584 int* indices;
588 int level;
589 };
590 typedef Node* NodePtr;
591
592
593
597 typedef BranchStruct<NodePtr, DistanceType> BranchSt;
598
599
600
601 void save_tree(FILE* stream, NodePtr node, int num)
602 {
603 save_value(stream, *node);
604 if (node->childs==NULL) {
605 int indices_offset = (int)(node->indices - indices[num]);
606 save_value(stream, indices_offset);
607 }
608 else {
609 for(int i=0; i<branching_; ++i) {
610 save_tree(stream, node->childs[i], num);
611 }
612 }
613 }
614
615
616 void load_tree(FILE* stream, NodePtr& node, int num)
617 {
618 node = pool.allocate<Node>();
619 load_value(stream, *node);
620 if (node->childs==NULL) {
621 int indices_offset;
622 load_value(stream, indices_offset);
623 node->indices = indices[num] + indices_offset;
624 }
625 else {
626 node->childs = pool.allocate<NodePtr>(branching_);
627 for(int i=0; i<branching_; ++i) {
628 load_tree(stream, node->childs[i], num);
629 }
630 }
631 }
632
633
637 void free_indices()
638 {
639 if (indices!=NULL) {
640 for(int i=0; i<trees_; ++i) {
641 if (indices[i]!=NULL) {
642 delete[] indices[i];
643 indices[i] = NULL;
644 }
645 }
646 }
647 }
648
649
650 void computeLabels(int* dsindices, int indices_length, int* centers, int centers_length, int* labels, DistanceType& cost)
651 {
652 cost = 0;
653 for (int i=0; i<indices_length; ++i) {
654 ElementType* point = dataset[dsindices[i]];
655 DistanceType dist = distance(point, dataset[centers[0]], veclen_);
656 labels[i] = 0;
657 for (int j=1; j<centers_length; ++j) {
658 DistanceType new_dist = distance(point, dataset[centers[j]], veclen_);
659 if (dist>new_dist) {
660 labels[i] = j;
661 dist = new_dist;
662 }
663 }
664 cost += dist;
665 }
666 }
667
679 void computeClustering(NodePtr node, int* dsindices, int indices_length, int branching, int level)
680 {
681 node->size = indices_length;
682 node->level = level;
683
684 if (indices_length < leaf_size_) { // leaf node
685 node->indices = dsindices;
686 std::sort(node->indices,node->indices+indices_length);
687 node->childs = NULL;
688 return;
689 }
690
691 std::vector<int> centers(branching);
692 std::vector<int> labels(indices_length);
693
694 int centers_length;
695 (this->*chooseCenters)(branching, dsindices, indices_length, &centers[0], centers_length);
696
697 if (centers_length<branching) {
698 node->indices = dsindices;
699 std::sort(node->indices,node->indices+indices_length);
700 node->childs = NULL;
701 return;
702 }
703
704
705 // assign points to clusters
706 DistanceType cost;
707 computeLabels(dsindices, indices_length, &centers[0], centers_length, &labels[0], cost);
708
709 node->childs = pool.allocate<NodePtr>(branching);
710 int start = 0;
711 int end = start;
712 for (int i=0; i<branching; ++i) {
713 for (int j=0; j<indices_length; ++j) {
714 if (labels[j]==i) {
715 std::swap(dsindices[j],dsindices[end]);
716 std::swap(labels[j],labels[end]);
717 end++;
718 }
719 }
720
721 node->childs[i] = pool.allocate<Node>();
722 node->childs[i]->pivot = centers[i];
723 node->childs[i]->indices = NULL;
724 computeClustering(node->childs[i],dsindices+start, end-start, branching, level+1);
725 start=end;
726 }
727 }
728
729
730
744 void findNN(NodePtr node, ResultSet<DistanceType>& result, const ElementType* vec, int& checks, int maxChecks,
745 Heap<BranchSt>* heap, std::vector<bool>& checked, bool explore_all_trees = false)
746 {
747 if (node->childs==NULL) {
748 if (!explore_all_trees && (checks>=maxChecks) && result.full()) {
749 return;
750 }
751 for (int i=0; i<node->size; ++i) {
752 int index = node->indices[i];
753 if (!checked[index]) {
754 DistanceType dist = distance(dataset[index], vec, veclen_);
755 result.addPoint(dist, index);
756 checked[index] = true;
757 ++checks;
758 }
759 }
760 }
761 else {
762 DistanceType* domain_distances = new DistanceType[branching_];
763 int best_index = 0;
764 domain_distances[best_index] = distance(vec, dataset[node->childs[best_index]->pivot], veclen_);
765 for (int i=1; i<branching_; ++i) {
766 domain_distances[i] = distance(vec, dataset[node->childs[i]->pivot], veclen_);
767 if (domain_distances[i]<domain_distances[best_index]) {
768 best_index = i;
769 }
770 }
771 for (int i=0; i<branching_; ++i) {
772 if (i!=best_index) {
773 heap->insert(BranchSt(node->childs[i],domain_distances[i]));
774 }
775 }
776 delete[] domain_distances;
777 findNN(node->childs[best_index],result,vec, checks, maxChecks, heap, checked, explore_all_trees);
778 }
779 }
780
781 private:
782
783
787 const Matrix<ElementType> dataset;
788
792 IndexParams params;
793
794
798 size_t size_;
799
803 size_t veclen_;
804
808 NodePtr* root;
809
813 int** indices;
814
815
819 Distance distance;
820
828 PooledAllocator pool;
829
833 int memoryCounter;
834
836 int branching_;
837 int trees_;
838 flann_centers_init_t centers_init_;
839 int leaf_size_;
840
841
842};
843
844}
845
847
848 #endif /* OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_ */
CV_EXPORTS_W void sort(InputArray src, OutputArray dst, int flags)
Sorts each row or each column of a matrix.
CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst)
Calculates per-element minimum of two arrays or an array and a scalar.
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails
Definition: base.hpp:342
#define CV_DbgAssert(expr)
Definition: base.hpp:375
CV_EXPORTS void swap(Mat &a, Mat &b)
Swaps two matrices