File: optics.hpp

package info (click to toggle)
python-pyclustering 0.10.1.2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 11,128 kB
  • sloc: cpp: 38,888; python: 24,311; sh: 384; makefile: 105
file content (200 lines) | stat: -rwxr-xr-x 6,225 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
/*!

@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright BSD-3-Clause

*/

#pragma once


#include <list>
#include <set>
#include <tuple>

#include <pyclustering/container/kdtree_balanced.hpp>

#include <pyclustering/cluster/optics_data.hpp>
#include <pyclustering/cluster/optics_descriptor.hpp>


namespace pyclustering {

namespace clst {


/*!

@brief Enumeration of input data type that are processed by OPTICS algorithm.

*/
enum class optics_data_t {
    POINTS,             /**< Data is represented by a container of points. */
    DISTANCE_MATRIX     /**< Data is represented by a distance matrix between points. */
};


/*!

@brief Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure).
@details OPTICS is a density-based algorithm. Purpose of the algorithm is to provide explicit clusters, but create clustering-ordering representation of the input data.
          Clustering-ordering information contains information about internal structures of data set in terms of density and proper connectivity radius can be obtained
          for allocation required amount of clusters using this diagram. In case of usage additional input parameter 'amount of clusters' connectivity radius should be
          bigger than real - because it will be calculated by the algorithms.

Implementation based on paper @cite article::optics::1.

*/
class optics {
public:
    static const double       NONE_DISTANCE;    /**< Defines no distance value. */
    static const std::size_t  INVALID_INDEX;    /**< Defines incorrect index. */

private:
    struct neighbor_descriptor {
    public:
        std::size_t m_index             = INVALID_INDEX;
        double m_reachability_distance  = 0;

    public:
        neighbor_descriptor(const std::size_t p_index, const double p_distance) :
            m_index(p_index), m_reachability_distance(p_distance)
        { }
    };

    struct neighbor_descriptor_less {
    public:
        bool operator()(const neighbor_descriptor & p_object1, const neighbor_descriptor & p_object2) const {
            return p_object1.m_reachability_distance < p_object2.m_reachability_distance;
        }
    };

    using neighbors_collection = std::multiset<neighbor_descriptor, neighbor_descriptor_less>;

private:
    const dataset       * m_data_ptr        = nullptr;

    optics_data         * m_result_ptr      = nullptr;

    double              m_radius            = 0.0;

    std::size_t         m_neighbors         = 0;

    std::size_t         m_amount_clusters   = 0;

    optics_data_t       m_type              = optics_data_t::POINTS;

    container::kdtree_balanced      m_kdtree            = container::kdtree_balanced();

    optics_object_sequence *        m_optics_objects    = nullptr;

    std::list<optics_descriptor *>  m_ordered_database  = { };

public:
    /*!
    
    @brief Default constructor of the algorithm.
    
    */
    optics() = default;

    /*!
    
    @brief Default copy constructor of the algorithm.
    
    */
    optics(const optics & p_other) = default;

    /*!

    @brief Default move constructor of the algorithm.

    */
    optics(optics && p_other) = default;

    /*!

    @brief Parameterized constructor of the algorithm.

    @param[in] p_radius: connectivity radius between objects.
    @param[in] p_neighbors: minimum amount of shared neighbors that is require to connect
                two object (if distance between them is less than connectivity radius).

    */
    optics(const double p_radius, const std::size_t p_neighbors);

    /*!

    @brief Creates algorithm with specified parameters.

    @param[in] p_radius: connectivity radius between objects.
    @param[in] p_neighbors: minimum amount of shared neighbors that is require to connect
                two object (if distance between them is less than connectivity radius).
    @param[in] p_amount_clusters: amount of clusters that should be allocated (in this case
                connectivity radius may be changed by the algorithm.
    */
    optics(const double p_radius, const std::size_t p_neighbors, const std::size_t p_amount_clusters);

    /*!

    @brief Default destructor to destroy algorithm instance.

    */
    ~optics() = default;

public:
    /*!

    @brief    Performs cluster analysis of an input data.

    @param[in]  p_data: input data for cluster analysis.
    @param[out] p_result: clustering result of an input data (consists of allocated clusters,
                 cluster-ordering, noise and proper connectivity radius).

    */
    void process(const dataset & p_data, optics_data & p_result);

    /*!

    @brief    Performs cluster analysis of specific input data (points or distance matrix) that is defined by the
               `p_type` argument.

    @param[in]  p_data: input data for cluster analysis.
    @param[in]  p_type: type of input data (points or distance matrix).
    @param[out] p_result: clustering result of an input data (consists of allocated clusters,
                 cluster-ordering, noise and proper connectivity radius).

    */
    void process(const dataset & p_data, const optics_data_t p_type, optics_data & p_result);

private:
    void initialize();

    void allocate_clusters();

    void expand_cluster_order(optics_descriptor & p_object);

    void extract_clusters();

    void get_neighbors(const std::size_t p_index, neighbors_collection & p_neighbors);

    void get_neighbors_from_points(const std::size_t p_index, neighbors_collection & p_neighbors);

    void get_neighbors_from_distance_matrix(const std::size_t p_index, neighbors_collection & p_neighbors);

    double get_core_distance(const neighbors_collection & p_neighbors) const;

    void update_order_seed(const optics_descriptor & p_object, const neighbors_collection & p_neighbors, std::multiset<optics_descriptor *, optics_pointer_descriptor_less> & order_seed);

    void calculate_ordering();

    void calculate_cluster_result();

    void create_kdtree();
};


}

}