File: cftree.py

package info (click to toggle)
python-pyclustering 0.10.1.2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 11,128 kB
  • sloc: cpp: 38,888; python: 24,311; sh: 384; makefile: 105
file content (1138 lines) | stat: -rwxr-xr-x 39,031 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
"""!

@brief Data Structure: CF-Tree
@details Implementation based on paper @cite article::birch::1.

@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright BSD-3-Clause

"""

import numpy

from copy import copy

from pyclustering.utils import euclidean_distance_square
from pyclustering.utils import manhattan_distance
from pyclustering.utils import linear_sum, square_sum

from enum import IntEnum


class measurement_type(IntEnum):
    """!
    @brief Enumeration of measurement types for CF-Tree.
    
    @see cftree
    
    """
    
    ## Euclidian distance between centroids of clustering features.
    CENTROID_EUCLIDEAN_DISTANCE = 0
    
    ## Manhattan distance between centroids of clustering features.
    CENTROID_MANHATTAN_DISTANCE = 1
    
    ## Average distance between all objects from clustering features.
    AVERAGE_INTER_CLUSTER_DISTANCE = 2
    
    ## Average distance between all objects within clustering features and between them.
    AVERAGE_INTRA_CLUSTER_DISTANCE = 3
    
    ## Variance based distance between clustering features.
    VARIANCE_INCREASE_DISTANCE = 4


class cfnode_type(IntEnum):
    """!
    @brief Enumeration of CF-Node types that are used by CF-Tree.
    
    @see cfnode
    @see cftree
    
    """
    
    ## Undefined node.
    CFNODE_DUMMY = 0
    
    ## Leaf node hasn't got successors, only entries.
    CFNODE_LEAF = 1
    
    ## Non-leaf node has got successors and hasn't got entries.
    CFNODE_NONLEAF = 2


class cfentry:
    """!
    @brief Clustering feature representation.
    
    @see cfnode
    @see cftree
    
    """

    @property
    def number_points(self):
        """!
        @brief Returns number of points that are encoded.
        
        @return (uint) Number of encoded points.
        
        """
        return self.__number_points

    @property
    def linear_sum(self):
        """!
        @brief Returns linear sum.
        
        @return (list) Linear sum.
        
        """
        
        return self.__linear_sum

    @property
    def square_sum(self):
        """!
        @brief Returns square sum.
        
        @return (double) Square sum.
        
        """
        return self.__square_sum


    def __init__(self, number_points, linear_sum, square_sum):
        """!
        @brief CF-entry constructor.
        
        @param[in] number_points (uint): Number of objects that is represented by the entry.
        @param[in] linear_sum (list): Linear sum of values that represent objects in each dimension.
        @param[in] square_sum (double): Square sum of values that represent objects.
        
        """
        
        self.__number_points = number_points
        self.__linear_sum = numpy.array(linear_sum)
        self.__square_sum = square_sum
        
        self.__centroid = None
        self.__radius = None
        self.__diameter = None


    def __copy__(self):
        """!
        @returns (cfentry) Makes copy of the CF-entry instance.
        
        """
        return cfentry(self.__number_points, self.__linear_sum, self.__square_sum)


    def __repr__(self):
        """!
        @return (string) Returns CF-entry representation.
        
        """
        return "CF-Entry (N: '%s', LS: '%s', D: '%s')" % \
               (self.number_points, self.linear_sum, str(self.get_diameter()))


    def __str__(self):
        """!
        @brief Default cfentry string representation.
        
        """
        return self.__repr__()


    def __add__(self, entry):
        """!
        @brief Overloaded operator add. Performs addition of two clustering features.
        
        @param[in] entry (cfentry): Entry that is added to the current.
        
        @return (cfentry) Result of addition of two clustering features.
        
        """
        
        number_points = self.number_points + entry.number_points
        result_linear_sum = numpy.add(self.linear_sum, entry.linear_sum)
        result_square_sum = self.square_sum + entry.square_sum
        
        return cfentry(number_points, result_linear_sum, result_square_sum)


    def __eq__(self, entry):
        """!
        @brief Overloaded operator eq. 
        @details Performs comparison of two clustering features.
        
        @param[in] entry (cfentry): Entry that is used for comparison with current.
        
        @return (bool) True is both clustering features are equals in line with tolerance, otherwise False.
        
        """
                
        tolerance = 0.00001
        
        result = (self.__number_points == entry.number_points)
        result &= ((self.square_sum + tolerance > entry.square_sum) and (self.square_sum - tolerance < entry.square_sum))
        
        for index_dimension in range(0, len(self.linear_sum)):
            result &= ((self.linear_sum[index_dimension] + tolerance > entry.linear_sum[index_dimension]) and (self.linear_sum[index_dimension] - tolerance < entry.linear_sum[index_dimension]))
        
        return result


    def get_distance(self, entry, type_measurement):
        """!
        @brief Calculates distance between two clusters in line with measurement type.
        
        @details In case of usage CENTROID_EUCLIDIAN_DISTANCE square euclidian distance will be returned.
                 Square root should be taken from the result for obtaining real euclidian distance between
                 entries. 
        
        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
        @param[in] type_measurement (measurement_type): Distance measurement algorithm between two clusters.
        
        @return (double) Distance between two clusters.
        
        """
        
        if type_measurement is measurement_type.CENTROID_EUCLIDEAN_DISTANCE:
            return euclidean_distance_square(entry.get_centroid(), self.get_centroid())
        
        elif type_measurement is measurement_type.CENTROID_MANHATTAN_DISTANCE:
            return manhattan_distance(entry.get_centroid(), self.get_centroid())
        
        elif type_measurement is measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE:
            return self.__get_average_inter_cluster_distance(entry)
            
        elif type_measurement is measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE:
            return self.__get_average_intra_cluster_distance(entry)
        
        elif type_measurement is measurement_type.VARIANCE_INCREASE_DISTANCE:
            return self.__get_variance_increase_distance(entry)

        else:
            raise ValueError("Unsupported type of measurement '%s' is specified." % type_measurement)


    def get_centroid(self):
        """!
        @brief Calculates centroid of cluster that is represented by the entry. 
        @details It's calculated once when it's requested after the last changes.
        
        @return (array_like) Centroid of cluster that is represented by the entry.
        
        """
        
        if self.__centroid is not None:
            return self.__centroid

        self.__centroid = numpy.divide(self.linear_sum, self.number_points)
        return self.__centroid
    
    
    def get_radius(self):
        """!
        @brief Calculates radius of cluster that is represented by the entry.
        @details It's calculated once when it's requested after the last changes.
        
        @return (double) Radius of cluster that is represented by the entry.
        
        """
        
        if self.__radius is not None:
            return self.__radius

        N = self.number_points
        centroid = self.get_centroid()
        
        radius_part_1 = self.square_sum
        radius_part_2 = 2.0 * numpy.dot(self.linear_sum, centroid)
        radius_part_3 = N * numpy.dot(centroid, centroid)
        
        self.__radius = ((1.0 / N) * (radius_part_1 - radius_part_2 + radius_part_3)) ** 0.5
        return self.__radius


    def get_diameter(self):
        """!
        @brief Calculates diameter of cluster that is represented by the entry.
        @details It's calculated once when it's requested after the last changes.
        
        @return (double) Diameter of cluster that is represented by the entry.
        
        """
        
        if self.__diameter is not None:
            return self.__diameter

        diameter_part = self.square_sum * self.number_points - 2.0 * numpy.dot(self.linear_sum, self.linear_sum) + self.square_sum * self.number_points

        if diameter_part < 0.000000001:
            self.__diameter = 0.0
        else:
            self.__diameter = (diameter_part / (self.number_points * (self.number_points - 1))) ** 0.5

        return self.__diameter


    def __get_average_inter_cluster_distance(self, entry):
        """!
        @brief Calculates average inter cluster distance between current and specified clusters.
        
        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
        
        @return (double) Average inter cluster distance.
        
        """
        
        linear_part_distance = numpy.dot(self.linear_sum, entry.linear_sum)

        return ((entry.number_points * self.square_sum - 2.0 * linear_part_distance + self.number_points * entry.square_sum) / (self.number_points * entry.number_points)) ** 0.5


    def __get_average_intra_cluster_distance(self, entry):
        """!
        @brief Calculates average intra cluster distance between current and specified clusters.
        
        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
        
        @return (double) Average intra cluster distance.
        
        """
        
        linear_part_first = numpy.add(self.linear_sum, entry.linear_sum)
        linear_part_second = linear_part_first
        
        linear_part_distance = numpy.dot(linear_part_first, linear_part_second)
        
        general_part_distance = 2.0 * (self.number_points + entry.number_points) * (self.square_sum + entry.square_sum) - 2.0 * linear_part_distance
        
        return (general_part_distance / ((self.number_points + entry.number_points) * (self.number_points + entry.number_points - 1.0))) ** 0.5
    
    
    def __get_variance_increase_distance(self, entry):
        """!
        @brief Calculates variance increase distance between current and specified clusters.
        
        @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
        
        @return (double) Variance increase distance.
        
        """
                
        linear_part_12 = numpy.add(self.linear_sum, entry.linear_sum)
        variance_part_first = (self.square_sum + entry.square_sum) - \
            2.0 * numpy.dot(linear_part_12, linear_part_12) / (self.number_points + entry.number_points) + \
            (self.number_points + entry.number_points) * numpy.dot(linear_part_12, linear_part_12) / (self.number_points + entry.number_points)**2.0
        
        linear_part_11 = numpy.dot(self.linear_sum, self.linear_sum)
        variance_part_second = -(self.square_sum - (2.0 * linear_part_11 / self.number_points) + (linear_part_11 / self.number_points))
        
        linear_part_22 = numpy.dot(entry.linear_sum, entry.linear_sum)
        variance_part_third = -(entry.square_sum - (2.0 / entry.number_points) * linear_part_22 + entry.number_points * (1.0 / entry.number_points ** 2.0) * linear_part_22)

        return variance_part_first + variance_part_second + variance_part_third
        

class cfnode:
    """!
    @brief Representation of node of CF-Tree.
    
    """
    
    def __init__(self, feature, parent):
        """!
        @brief Constructor of abstract CF node.
        
        @param[in] feature (cfentry): Clustering feature of the created node.
        @param[in] parent (cfnode): Parent of the created node.
        
        """
        
        ## Clustering feature of the node.
        self.feature = copy(feature)

        ## Pointer to the parent node (None for root).
        self.parent = parent
        
        ## Type node (leaf or non-leaf).
        self.type = cfnode_type.CFNODE_DUMMY
    
    
    def __repr__(self):
        """!
        @return (string) Default representation of CF node.
        
        """
        
        return 'CF node %s, parent %s, feature %s' % (hex(id(self)), self.parent, self.feature)


    def __str__(self):
        """!
        @return (string) String representation of CF node.
        
        """
        return self.__repr__()
    
    
    def get_distance(self, node, type_measurement):
        """!
        @brief Calculates distance between nodes in line with specified type measurement.
        
        @param[in] node (cfnode): CF-node that is used for calculation distance to the current node.
        @param[in] type_measurement (measurement_type): Measurement type that is used for calculation distance.
        
        @return (double) Distance between two nodes.
        
        """
        
        return self.feature.get_distance(node.feature, type_measurement)
    

class non_leaf_node(cfnode):
    """!
    @brief Representation of clustering feature non-leaf node.
    
    """ 
    
    @property
    def successors(self):
        """!
        @return (list) List of successors of the node.
        
        """
        return self.__successors
    
    
    def __init__(self, feature, parent, successors):
        """!
        @brief Create CF Non-leaf node.
        
        @param[in] feature (cfentry): Clustering feature of the created node.
        @param[in] parent (non_leaf_node): Parent of the created node.
        @param[in] successors (list): List of successors of the node.
        
        """
                
        super().__init__(feature, parent)
        
        ## Node type in CF tree that is CFNODE_NONLEAF for non leaf node.
        self.type = cfnode_type.CFNODE_NONLEAF
        
        self.__successors = successors
    
    
    def __repr__(self):
        """!
        @return (string) Representation of non-leaf node representation.
        
        """   
        return 'Non-leaf node %s, parent %s, feature %s, successors: %d' % (hex(id(self)), self.parent, self.feature, len(self.successors))
    
    
    def __str__(self):
        """!
        @return (string) String non-leaf representation.
        
        """
        return self.__repr__()
    
    
    def insert_successor(self, successor):
        """!
        @brief Insert successor to the node.
        
        @param[in] successor (cfnode): Successor for adding.
        
        """
        
        self.feature += successor.feature
        self.successors.append(successor)
        
        successor.parent = self


    def merge(self, node):
        """!
        @brief Merge non-leaf node to the current.
        
        @param[in] node (non_leaf_node): Non-leaf node that should be merged with current.
        
        """
                
        self.feature += node.feature
        
        for child in node.successors:
            child.parent = self
            self.successors.append(child)
    
    
    def get_farthest_successors(self, type_measurement):
        """!
        @brief Find pair of farthest successors of the node in line with measurement type.
        
        @param[in] type_measurement (measurement_type): Measurement type that is used for obtaining farthest successors.
        
        @return (list) Pair of farthest successors represented by list [cfnode1, cfnode2].
        
        """
        
        farthest_node1 = None
        farthest_node2 = None
        farthest_distance = 0.0
        
        for i in range(0, len(self.successors)):
            candidate1 = self.successors[i]
            
            for j in range(i + 1, len(self.successors)):
                candidate2 = self.successors[j]
                candidate_distance = candidate1.get_distance(candidate2, type_measurement)
                
                if candidate_distance > farthest_distance:
                    farthest_distance = candidate_distance
                    farthest_node1 = candidate1
                    farthest_node2 = candidate2
        
        return [farthest_node1, farthest_node2]
    
    
    def get_nearest_successors(self, type_measurement):
        """!
        @brief Find pair of nearest successors of the node in line with measurement type.
        
        @param[in] type_measurement (measurement_type): Measurement type that is used for obtaining nearest successors.
        
        @return (list) Pair of nearest successors represented by list.
        
        """
                
        nearest_node1 = None
        nearest_node2 = None
        nearest_distance = float("Inf")
        
        for i in range(0, len(self.successors)):
            candidate1 = self.successors[i]
            
            for j in range(i + 1, len(self.successors)):
                candidate2 = self.successors[j]
                candidate_distance = candidate1.get_distance(candidate2, type_measurement)
                
                if candidate_distance < nearest_distance:
                    nearest_distance = candidate_distance
                    nearest_node1 = candidate1
                    nearest_node2 = candidate2
        
                return [nearest_node1, nearest_node2]


class leaf_node(cfnode):
    """!
    @brief Represents clustering feature leaf node.
    
    """
    
    @property
    def entries(self):
        """!
        @return (list) List of leaf nodes.
        
        """
        return self.__entries
    
    
    def __init__(self, feature, parent, entries):
        """!
        @brief Create CF Leaf node.
        
        @param[in] feature (cfentry): Clustering feature of the created node.
        @param[in] parent (non_leaf_node): Parent of the created node.
        @param[in] entries (list): List of entries of the node.
        
        """
        
        super().__init__(feature, parent)
        
        ## Node type in CF tree that is CFNODE_LEAF for leaf node.
        self.type = cfnode_type.CFNODE_LEAF
        
        self.__entries = entries   # list of clustering features
        
    
    def __repr__(self):
        """!
        @return (string) Default leaf node represenation.
        
        """
        text_entries = "\n"
        for entry in self.entries:
            text_entries += "\t" + str(entry) + "\n"
        
        return "Leaf-node: '%s', parent: '%s', feature: '%s', entries: '%d'" % \
               (str(hex(id(self))), self.parent, self.feature, len(self.entries))
    
    
    def __str__(self):
        """!
        @return (string) String leaf node representation.
        
        """
        return self.__repr__()
    
    
    def insert_entry(self, entry):
        """!
        @brief Insert new clustering feature to the leaf node.
        
        @param[in] entry (cfentry): Clustering feature.
        
        """

        self.feature += entry
        self.entries.append(entry)


    def merge(self, node):
        """!
        @brief Merge leaf node to the current.
        
        @param[in] node (leaf_node): Leaf node that should be merged with current.
        
        """
        
        self.feature += node.feature
        
        # Move entries from merged node
        for entry in node.entries:
            self.entries.append(entry)


    def get_farthest_entries(self, type_measurement):
        """!
        @brief Find pair of farthest entries of the node.
        
        @param[in] type_measurement (measurement_type): Measurement type that is used for obtaining farthest entries.
        
        @return (list) Pair of farthest entries of the node that are represented by list.
        
        """
        
        farthest_entity1 = None
        farthest_entity2 = None
        farthest_distance = 0
        
        for i in range(0, len(self.entries)):
            candidate1 = self.entries[i]
            
            for j in range(i + 1, len(self.entries)):
                candidate2 = self.entries[j]
                candidate_distance = candidate1.get_distance(candidate2, type_measurement)
                
                if candidate_distance > farthest_distance:
                    farthest_distance = candidate_distance
                    farthest_entity1 = candidate1
                    farthest_entity2 = candidate2
        
        return [farthest_entity1, farthest_entity2]


    def get_nearest_index_entry(self, entry, type_measurement):
        """!
        @brief Find nearest index of nearest entry of node for the specified entry.
        
        @param[in] entry (cfentry): Entry that is used for calculation distance.
        @param[in] type_measurement (measurement_type): Measurement type that is used for obtaining nearest entry to the specified.
        
        @return (uint) Index of nearest entry of node for the specified entry.
        
        """
        
        minimum_distance = float('Inf')
        nearest_index = -1
        
        for candidate_index in range(0, len(self.entries)):
            candidate_distance = self.entries[candidate_index].get_distance(entry, type_measurement)
            if candidate_distance < minimum_distance:
                minimum_distance = candidate_distance
                nearest_index = candidate_index
        
        return nearest_index


    def get_nearest_entry(self, entry, type_measurement):
        """!
        @brief Find nearest entry of node for the specified entry.
        
        @param[in] entry (cfentry): Entry that is used for calculation distance.
        @param[in] type_measurement (measurement_type): Measurement type that is used for obtaining nearest entry to the specified.
        
        @return (cfentry) Nearest entry of node for the specified entry.
        
        """
        
        min_key = lambda cur_entity: cur_entity.get_distance(entry, type_measurement)
        return min(self.entries, key=min_key)


class cftree:
    """!
    @brief CF-Tree representation.
    @details A CF-tree is a height-balanced tree with two parameters: branching factor and threshold.
    
    """

    @property
    def root(self):
        """!
        @return (cfnode) Root of the tree.
        
        """
        return self.__root


    @property
    def leafes(self):
        """!
        @return (list) List of all leaf nodes in the tree.
        
        """
        return self.__leafes


    @property
    def amount_nodes(self):
        """!
        @return (unit) Number of nodes (leaf and non-leaf) in the tree.
        
        """
        return self.__amount_nodes


    @property
    def amount_entries(self):
        """!
        @return (uint) Number of entries in the tree.
        
        """
        return self.__amount_entries


    @property
    def height(self):
        """!
        @return (uint) Height of the tree.
        
        """
        return self.__height


    @property
    def branch_factor(self):
        """!
        @return (uint) Branching factor of the tree.
        @details Branching factor defines maximum number of successors in each non-leaf node.
        
        """
        return self.__branch_factor


    @property
    def threshold(self):
        """!
        @return (double) Threshold of the tree that represents maximum diameter of sub-clusters that is formed by leaf node entries.
        
        """
        return self.__threshold


    @property
    def max_entries(self):
        """!
        @return (uint) Maximum number of entries in each leaf node.
        
        """
        return self.__max_entries


    @property
    def type_measurement(self):
        """!
        @return (measurement_type) Type that is used for measuring.
        
        """
        return self.__type_measurement


    def __init__(self, branch_factor, max_entries, threshold, type_measurement = measurement_type.CENTROID_EUCLIDEAN_DISTANCE):
        """!
        @brief Create CF-tree.
        
        @param[in] branch_factor (uint): Maximum number of children for non-leaf nodes.
        @param[in] max_entries (uint): Maximum number of entries for leaf nodes.
        @param[in] threshold (double): Maximum diameter of feature clustering for each leaf node.
        @param[in] type_measurement (measurement_type): Measurement type that is used for calculation distance metrics.
        
        """

        self.__root = None

        self.__branch_factor = branch_factor  # maximum number of children
        if self.__branch_factor < 2:
            self.__branch_factor = 2
        
        self.__threshold = threshold  # maximum diameter of sub-clusters stored at the leaf nodes
        self.__max_entries = max_entries
        
        self.__leafes = []
        
        self.__type_measurement = type_measurement
        
        # statistics
        self.__amount_nodes = 0    # root, despite it can be None.
        self.__amount_entries = 0
        self.__height = 0          # tree size with root.


    def get_level_nodes(self, level):
        """!
        @brief Traverses CF-tree to obtain nodes at the specified level.
        
        @param[in] level (uint): CF-tree level from that nodes should be returned.
        
        @return (list) List of CF-nodes that are located on the specified level of the CF-tree.
        
        """
        
        level_nodes = []
        if level < self.__height:
            level_nodes = self.__recursive_get_level_nodes(level, self.__root)
        
        return level_nodes


    def __recursive_get_level_nodes(self, level, node):
        """!
        @brief Traverses CF-tree to obtain nodes at the specified level recursively.
        
        @param[in] level (uint): Current CF-tree level.
        @param[in] node (cfnode): CF-node from that traversing is performed.
        
        @return (list) List of CF-nodes that are located on the specified level of the CF-tree.
        
        """
        
        level_nodes = []
        if level == 0:
            level_nodes.append(node)
        
        else:
            for sucessor in node.successors:
                level_nodes += self.__recursive_get_level_nodes(level - 1, sucessor)
        
        return level_nodes


    def insert_point(self, point):
        """!
        @brief Insert point that is represented by list of coordinates.

        @param[in] point (list): Point represented by list of coordinates that should be inserted to CF tree.

        """

        entry = cfentry(len([point]), linear_sum([point]), square_sum([point]))
        self.insert(entry)
    
    
    def insert(self, entry):
        """!
        @brief Insert clustering feature to the tree.
        
        @param[in] entry (cfentry): Clustering feature that should be inserted.
        
        """
                
        if self.__root is None:
            node = leaf_node(entry, None, [entry])
            
            self.__root = node
            self.__leafes.append(node)
            
            # Update statistics
            self.__amount_entries += 1
            self.__amount_nodes += 1
            self.__height += 1             # root has successor now
        else:
            child_node_updation = self.__recursive_insert(entry, self.__root)
            if child_node_updation is True:
                # Splitting has been finished, check for possibility to merge (at least we have already two children).
                if self.__merge_nearest_successors(self.__root) is True:
                    self.__amount_nodes -= 1


    def find_nearest_leaf(self, entry, search_node = None):
        """!
        @brief Search nearest leaf to the specified clustering feature.
        
        @param[in] entry (cfentry): Clustering feature.
        @param[in] search_node (cfnode): Node from that searching should be started, if None then search process will be started for the root.
        
        @return (leaf_node) Nearest node to the specified clustering feature.
        
        """
        
        if search_node is None:
            search_node = self.__root
        
        nearest_node = search_node
        
        if search_node.type == cfnode_type.CFNODE_NONLEAF:
            min_key = lambda child_node: child_node.feature.get_distance(entry, self.__type_measurement)
            nearest_child_node = min(search_node.successors, key = min_key)
            
            nearest_node = self.find_nearest_leaf(entry, nearest_child_node)
        
        return nearest_node


    def __recursive_insert(self, entry, search_node):
        """!
        @brief Recursive insert of the entry to the tree.
        @details It performs all required procedures during insertion such as splitting, merging.
        
        @param[in] entry (cfentry): Clustering feature.
        @param[in] search_node (cfnode): Node from that insertion should be started.
        
        @return (bool) True if number of nodes at the below level is changed, otherwise False.
        
        """
        
        # None-leaf node
        if search_node.type == cfnode_type.CFNODE_NONLEAF:
            return self.__insert_for_noneleaf_node(entry, search_node)
        
        # Leaf is reached 
        else:
            return self.__insert_for_leaf_node(entry, search_node)


    def __insert_for_leaf_node(self, entry, search_node):
        """!
        @brief Recursive insert entry from leaf node to the tree.
        
        @param[in] entry (cfentry): Clustering feature.
        @param[in] search_node (cfnode): None-leaf node from that insertion should be started.
        
        @return (bool) True if number of nodes at the below level is changed, otherwise False.
        
        """
        
        node_amount_updation = False
        
        # Try to absorb by the entity
        index_nearest_entry = search_node.get_nearest_index_entry(entry, self.__type_measurement)
        nearest_entry = search_node.entries[index_nearest_entry]    # get nearest entry
        merged_entry = nearest_entry + entry
        
        # Otherwise try to add new entry
        if merged_entry.get_diameter() > self.__threshold:
            # If it's not exceeded append entity and update feature of the leaf node.
            search_node.insert_entry(entry)
            
            # Otherwise current node should be splitted
            if len(search_node.entries) > self.__max_entries:
                self.__split_procedure(search_node)
                node_amount_updation = True
            
            # Update statistics
            self.__amount_entries += 1
            
        else:
            search_node.entries[index_nearest_entry] = merged_entry
            search_node.feature += entry
        
        return node_amount_updation


    def __insert_for_noneleaf_node(self, entry, search_node):
        """!
        @brief Recursive insert entry from none-leaf node to the tree.
        
        @param[in] entry (cfentry): Clustering feature.
        @param[in] search_node (cfnode): None-leaf node from that insertion should be started.
        
        @return (bool) True if number of nodes at the below level is changed, otherwise False.
        
        """
        
        node_amount_updation = False
        
        min_key = lambda child_node: child_node.get_distance(search_node, self.__type_measurement)
        nearest_child_node = min(search_node.successors, key=min_key)
        
        child_node_updation = self.__recursive_insert(entry, nearest_child_node)
        
        # Update clustering feature of none-leaf node.
        search_node.feature += entry
            
        # Check branch factor, probably some leaf has been splitted and threshold has been exceeded.
        if (len(search_node.successors) > self.__branch_factor):
            
            # Check if it's aleady root then new root should be created (height is increased in this case).
            if search_node is self.__root:
                self.__root = non_leaf_node(search_node.feature, None, [search_node])
                search_node.parent = self.__root
                
                # Update statistics
                self.__amount_nodes += 1
                self.__height += 1
                
            [new_node1, new_node2] = self.__split_nonleaf_node(search_node)
            
            # Update parent list of successors
            parent = search_node.parent
            parent.successors.remove(search_node)
            parent.successors.append(new_node1)
            parent.successors.append(new_node2)
            
            # Update statistics
            self.__amount_nodes += 1
            node_amount_updation = True
            
        elif child_node_updation is True:
            # Splitting has been finished, check for possibility to merge (at least we have already two children).
            if self.__merge_nearest_successors(search_node) is True:
                self.__amount_nodes -= 1
        
        return node_amount_updation


    def __merge_nearest_successors(self, node):
        """!
        @brief Find nearest sucessors and merge them.
        
        @param[in] node (non_leaf_node): Node whose two nearest successors should be merged.
        
        @return (bool): True if merging has been successfully performed, otherwise False.
        
        """
        
        merging_result = False
        
        if node.successors[0].type == cfnode_type.CFNODE_NONLEAF:
            [nearest_child_node1, nearest_child_node2] = node.get_nearest_successors(self.__type_measurement)
            
            if len(nearest_child_node1.successors) + len(nearest_child_node2.successors) <= self.__branch_factor:
                node.successors.remove(nearest_child_node2)
                if nearest_child_node2.type == cfnode_type.CFNODE_LEAF:
                    self.__leafes.remove(nearest_child_node2)
                
                nearest_child_node1.merge(nearest_child_node2)
                
                merging_result = True
        
        return merging_result


    def __split_procedure(self, split_node):
        """!
        @brief Starts node splitting procedure in the CF-tree from the specify node.
        
        @param[in] split_node (cfnode): CF-tree node that should be splitted.
        
        """
        if split_node is self.__root:
            self.__root = non_leaf_node(split_node.feature, None, [ split_node ])
            split_node.parent = self.__root
            
            # Update statistics
            self.__amount_nodes += 1
            self.__height += 1
        
        [new_node1, new_node2] = self.__split_leaf_node(split_node)
        
        self.__leafes.remove(split_node)
        self.__leafes.append(new_node1)
        self.__leafes.append(new_node2)
        
        # Update parent list of successors
        parent = split_node.parent
        parent.successors.remove(split_node)
        parent.successors.append(new_node1)
        parent.successors.append(new_node2)
        
        # Update statistics
        self.__amount_nodes += 1


    def __split_nonleaf_node(self, node):
        """!
        @brief Performs splitting of the specified non-leaf node.
        
        @param[in] node (non_leaf_node): Non-leaf node that should be splitted.
        
        @return (list) New pair of non-leaf nodes [non_leaf_node1, non_leaf_node2].
        
        """
        
        [farthest_node1, farthest_node2] = node.get_farthest_successors(self.__type_measurement)
        
        # create new non-leaf nodes
        new_node1 = non_leaf_node(farthest_node1.feature, node.parent, [farthest_node1])
        new_node2 = non_leaf_node(farthest_node2.feature, node.parent, [farthest_node2])
        
        farthest_node1.parent = new_node1
        farthest_node2.parent = new_node2
        
        # re-insert other successors
        for successor in node.successors:
            if (successor is not farthest_node1) and (successor is not farthest_node2):
                distance1 = new_node1.get_distance(successor, self.__type_measurement)
                distance2 = new_node2.get_distance(successor, self.__type_measurement)
                
                if distance1 < distance2:
                    new_node1.insert_successor(successor)
                else:
                    new_node2.insert_successor(successor)
        
        return [new_node1, new_node2]


    def __split_leaf_node(self, node):
        """!
        @brief Performs splitting of the specified leaf node.
        
        @param[in] node (leaf_node): Leaf node that should be splitted.
        
        @return (list) New pair of leaf nodes [leaf_node1, leaf_node2].
        
        @warning Splitted node is transformed to non_leaf.
        
        """
        
        # search farthest pair of entries
        [farthest_entity1, farthest_entity2] = node.get_farthest_entries(self.__type_measurement)
                    
        # create new nodes
        new_node1 = leaf_node(farthest_entity1, node.parent, [farthest_entity1])
        new_node2 = leaf_node(farthest_entity2, node.parent, [farthest_entity2])
        
        # re-insert other entries
        for entity in node.entries:
            if (entity is not farthest_entity1) and (entity is not farthest_entity2):
                distance1 = new_node1.feature.get_distance(entity, self.__type_measurement)
                distance2 = new_node2.feature.get_distance(entity, self.__type_measurement)
                
                if distance1 < distance2:
                    new_node1.insert_entry(entity)
                else:
                    new_node2.insert_entry(entity)
        
        return [new_node1, new_node2]