File: cure.hpp

package info (click to toggle)
python-pyclustering 0.10.1.2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 11,128 kB
  • sloc: cpp: 38,888; python: 24,311; sh: 384; makefile: 105
file content (443 lines) | stat: -rwxr-xr-x 11,047 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
/*!

@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright BSD-3-Clause

*/

#pragma once


#include <algorithm>
#include <list>
#include <set>
#include <vector>

#include <pyclustering/container/kdtree.hpp>

#include <pyclustering/cluster/cure_data.hpp>


using namespace pyclustering::container;


namespace pyclustering {

namespace clst {


/*!

@class   cure_cluster cure.hpp pyclustering/cluster/cure.hpp

@brief   CURE cluster description.

*/
struct cure_cluster {
public:
    std::vector<double> * mean;                         /**< Center of the cluster that is defined by mean value among cluster points. */
    std::vector< std::vector<double> * > * points;      /**< Points that are contained by the cluster. */
    std::vector< std::vector<double> * > * rep;         /**< Representative points of the cluster. */

    cure_cluster *          closest;                    /**< Pointer to the closest cluster. */
    double                  distance_closest;           /**< Distance to the closest cluster. */

public:
    /*!
    
    @brief   Default constructor of CURE cluster.
    
    */
    cure_cluster();

    /*!
    
    @brief   Construct CURE cluster using signle point.
    
    */
    explicit cure_cluster(std::vector<double> * point);

    /*!
    
    @brief   Default destructor.
    
    */
    ~cure_cluster();

    /*!
    
    @brief   Copy constructor is deleted due to safety reasons.
    
    */
    cure_cluster(const cure_cluster & p_other) = delete;

public:
    /*!
    
    @brief   Insert points to cluster.
    
    */
    void insert_points(std::vector<std::vector<double> *> * append_points);

public:
    /*!

    @brief   Assignment operator is deleted due to safety reasons.

    */
    cure_cluster & operator=(const cure_cluster & p_other) = delete;

    /*!

    @brief   Write object operator to represent CURE cluster by text.

    */
    friend std::ostream & operator<<(std::ostream & p_stream, cure_cluster & p_cluster);
};



/*!

@class   cure_cluster_comparator cure.hpp pyclustering/cluster/cure.hpp

@brief   CURE cluster comparator using closest distance.

*/
class cure_cluster_comparator {
public:
    /*!
    
    @brief   Defines compare procedure using closest distance without checking for nullptr.
    @details Arguments are not checked for nullptr, this checking should be done by client code.
    
    @param[in] obj1: pointer to left operand (CURE cluster 1).
    @param[in] obj2: pointer to right operand (CURE cluster 2).
    
    @return  Returns `true` if left operand is less than right.
    
    */
    bool operator()(const cure_cluster * const obj1, const cure_cluster * const obj2) const;
};



/*!

@class   cure_queue cure.hpp pyclustering/cluster/cure.hpp

@brief   Cure sorted queue of cure clusters. Sorting is defined by distances between clusters.
          First element is always occupied by cluster whose distance to the neighbor cluster
          is the smallest in the queue.

*/
class cure_queue {
private:
    std::multiset<cure_cluster *, cure_cluster_comparator> * queue;
    kdtree * tree;

private:
    /*!
    
    @brief   Creates sorted queue of points for specified data.
    
    @param[in] data: pointer to points.
    
    */
    void create_queue(const dataset * data);

    /*!
    
    @brief   Remove representative points of specified cluster from KD Tree.
    
    @param[in] cluster: pointer to points.
    
    */
    void remove_representative_points(cure_cluster * cluster);

    /*!
    
    @brief   Insert representative points of specified cluster to KD tree.
    
    @param[in] cluster: pointer to points.
    
    */
    void insert_representative_points(cure_cluster * cluster);

    /*!
    
    @brief   Insert cluster to sorted queue (it's not insertion of new object).
    @details Used by sorting procedures.
    
    @param[in] inserted_cluster: pointer to points.
    
    */
    void insert_cluster(cure_cluster * inserted_cluster);

    /*!
    
    @brief   Remove cluster from sorted queue (it's not removing of new object). Used by sorting
              procedures.
    
    @param[in] removed_cluster: pointer to points.
    
    */
    void remove_cluster(cure_cluster * removed_cluster);

    /*!
    
    @brief   Calculate distance between clusters.
    
    @param[in] cluster1: pointer to cure cluster 1.
    @param[in] cluster2: pointer to cure cluster 2.
    
    @return  Return distance between clusters.
    
    */
    static double get_distance(cure_cluster * cluster1, cure_cluster * cluster2);

    /*!
    
    @brief   Checks if all elements of a merged cluster are same.
    
    @param[in] merged_cluster: pointer to cure merged_cluster.
    
    @return  Returns true if all the elements in the cluster were found to be same.
    
    */
    static bool are_all_elements_same(cure_cluster * merged_cluster);

public:
    /*!
    
    @brief  Iterator to iterate though CURE clusters in the sorted queue.
    
    */
    using iterator        = std::multiset<cure_cluster *, cure_cluster_comparator>::iterator;

    /*!

    @brief  Constant iterator to iterate though CURE clusters in the sorted queue.

    */
    using const_iterator  = std::multiset<cure_cluster *, cure_cluster_comparator>::const_iterator;

    /*!
    
    @brief   Default constructor of cure queue (always keeps sorted state).
    
    */
    cure_queue();

    /*!
    
    @brief   Default constructor of sorted queue of cure clusters.
    
    @param[in] data: pointer to points.
    
    */
    explicit cure_queue(const std::vector< std::vector<double> > * data);

    /*!
    
    @brief   Default copy constructor of sorted queue of cure clusters is forbidden.
    
    @param[in] p_other: other cure queue to copy.
    
    */
    cure_queue(const cure_queue & p_other) = delete;

    /*!
    
    @brief   Default destructor.
    
    */
    ~cure_queue();

    /*!
    
    @brief   Merge cure clusters in line with the rule of merging of cure algorithm.
    
    @param[in,out] cluster1: pointer to cure cluster 1.
    @param[in,out] cluster2: pointer to cure cluster 2.
    @param[in] number_repr_points: number of representative points for merged cluster.
    @param[in] compression: level of compression for calculation representative points.
    
    */
    void merge(cure_cluster * cluster1, cure_cluster * cluster2, const size_t number_repr_points, const double compression);

    /*!
    
    @brief   Returns iterator to the first CURE cluster.
    
    */
    inline iterator begin() { return queue->begin(); }

    /*!
    
    @brief   Returns constant iterator to the first CURE cluster.
    
    */
    inline const_iterator begin() const { return queue->begin(); };

    /*!
    
    @brief   Returns iterator to the end of CURE cluster collection (not a last element).
    
    */
    inline iterator end() { return queue->end(); }

    /*!
    
    @brief   Returns constant iterator to the end of CURE cluster collection (not a last element).
    
    */
    inline const_iterator end() const { return queue->end(); }

    /*!
    
    @brief   Returns amount of CURE clusters in the queue.
    
    */
    inline std::size_t size() const { return queue->size(); }

public:
    /*!
    
    @brief   Assignment operator is forbidden.
    
    @param[in] p_other: other cure queue to copy.
    
    */
    cure_queue & operator=(const cure_queue & p_other) = delete;
};



/*!

@class  relocation_info cure.hpp pyclustering/cluster/cure.hpp

@brief  Defines relocation request for specific cluster in CURE queue.

*/
class relocation_info {
private:
    cure_queue::iterator m_cluster_iterator;
    cure_cluster * m_closest_cluster;
    double m_closest_distance;

public:
    /*!
    
    @brief  Constructor for relocation request.

    @param[in] cluster_iterator: iterator that points to the cluster in the queue.
    @param[in] closest_cluster: new closest cluster for the cluster that should be relocated.
    @param[in] closest_distance: distance to the closest cluster.
    
    */
    relocation_info(const cure_queue::iterator & cluster_iterator, cure_cluster * closest_cluster, const double closest_distance);

public:
    /*!
    
    @brief  Returns iterator to cluster in CURE queue.

    @return Iterator to cluster in CURE queue that should be relocated.

    */
    cure_queue::iterator get_cluster_iterator() const;

    /*!

    @brief  Returns distance to the closest cluster.

    @return Distance to the closest cluster.

    */
    double get_closest_distance() const;

    /*!

    @brief  Returns pointer to the closest cluster.

    @return Pointer to the closest cluster.

    */
    cure_cluster * get_closest_cluster();
};



/*!

@class   cure cure.hpp pyclustering/cluster/cure.hpp

@brief   CURE clustering algorithm that employes a hierarchical clustering algorithm that adopts a middle
          ground between the centroid-based and the all-point extremes.

@details CURE algorithm identifies clusters having non-spherical shapes and wide variances in size. CURE 
          algorithm represents each cluster by a certain fixed number of points that are generated
          by selecting well scattered points from the cluster and then shrinking them toward the center
          of the cluster by a specified fraction. Having more than one representative point per cluster
          allows CURE to adjust well to the geometry of non-spherical shapes.

Implementation based on paper @cite article::cure::1.

*/
class cure {
private:
    cure_queue * queue;

    std::size_t number_points;

    std::size_t number_clusters;

    double compression;

    const dataset   * data;

public:
    /*!
    
    @brief   Default CURE algorithm constructor.
    
    */
    cure() = default;

    /*!
    
    @brief   Constructor of CURE algorithm.
    
    @param[in] clusters_number: number of clusters that should be allocated.
    @param[in] points_number: number of representative points in each cluster.
    @param[in] level_compression: level of compression for calculation new representative points for merged cluster.
    
    */
    cure(const size_t clusters_number, const size_t points_number, const double level_compression);

    /*!
    
    @brief   Default destructor.
    
    */
    ~cure();

public:
    /*!
    
    @brief    Performs cluster analysis of an input data.
    
    @param[in]  p_data: input data for cluster analysis.
    @param[out] p_result: clustering result of an input data.
    
    */
    void process(const dataset & p_data, cure_data & p_result);
};


}

}