1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
|
/*!
@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright BSD-3-Clause
*/
#pragma once
#include <list>
#include <set>
#include <tuple>
#include <pyclustering/container/kdtree_balanced.hpp>
#include <pyclustering/cluster/optics_data.hpp>
#include <pyclustering/cluster/optics_descriptor.hpp>
namespace pyclustering {
namespace clst {
/*!
@brief Enumeration of input data type that are processed by OPTICS algorithm.
*/
enum class optics_data_t {
POINTS, /**< Data is represented by a container of points. */
DISTANCE_MATRIX /**< Data is represented by a distance matrix between points. */
};
/*!
@brief Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure).
@details OPTICS is a density-based algorithm. Purpose of the algorithm is to provide explicit clusters, but create clustering-ordering representation of the input data.
Clustering-ordering information contains information about internal structures of data set in terms of density and proper connectivity radius can be obtained
for allocation required amount of clusters using this diagram. In case of usage additional input parameter 'amount of clusters' connectivity radius should be
bigger than real - because it will be calculated by the algorithms.
Implementation based on paper @cite article::optics::1.
*/
class optics {
public:
static const double NONE_DISTANCE; /**< Defines no distance value. */
static const std::size_t INVALID_INDEX; /**< Defines incorrect index. */
private:
struct neighbor_descriptor {
public:
std::size_t m_index = INVALID_INDEX;
double m_reachability_distance = 0;
public:
neighbor_descriptor(const std::size_t p_index, const double p_distance) :
m_index(p_index), m_reachability_distance(p_distance)
{ }
};
struct neighbor_descriptor_less {
public:
bool operator()(const neighbor_descriptor & p_object1, const neighbor_descriptor & p_object2) const {
return p_object1.m_reachability_distance < p_object2.m_reachability_distance;
}
};
using neighbors_collection = std::multiset<neighbor_descriptor, neighbor_descriptor_less>;
private:
const dataset * m_data_ptr = nullptr;
optics_data * m_result_ptr = nullptr;
double m_radius = 0.0;
std::size_t m_neighbors = 0;
std::size_t m_amount_clusters = 0;
optics_data_t m_type = optics_data_t::POINTS;
container::kdtree_balanced m_kdtree = container::kdtree_balanced();
optics_object_sequence * m_optics_objects = nullptr;
std::list<optics_descriptor *> m_ordered_database = { };
public:
/*!
@brief Default constructor of the algorithm.
*/
optics() = default;
/*!
@brief Default copy constructor of the algorithm.
*/
optics(const optics & p_other) = default;
/*!
@brief Default move constructor of the algorithm.
*/
optics(optics && p_other) = default;
/*!
@brief Parameterized constructor of the algorithm.
@param[in] p_radius: connectivity radius between objects.
@param[in] p_neighbors: minimum amount of shared neighbors that is require to connect
two object (if distance between them is less than connectivity radius).
*/
optics(const double p_radius, const std::size_t p_neighbors);
/*!
@brief Creates algorithm with specified parameters.
@param[in] p_radius: connectivity radius between objects.
@param[in] p_neighbors: minimum amount of shared neighbors that is require to connect
two object (if distance between them is less than connectivity radius).
@param[in] p_amount_clusters: amount of clusters that should be allocated (in this case
connectivity radius may be changed by the algorithm.
*/
optics(const double p_radius, const std::size_t p_neighbors, const std::size_t p_amount_clusters);
/*!
@brief Default destructor to destroy algorithm instance.
*/
~optics() = default;
public:
/*!
@brief Performs cluster analysis of an input data.
@param[in] p_data: input data for cluster analysis.
@param[out] p_result: clustering result of an input data (consists of allocated clusters,
cluster-ordering, noise and proper connectivity radius).
*/
void process(const dataset & p_data, optics_data & p_result);
/*!
@brief Performs cluster analysis of specific input data (points or distance matrix) that is defined by the
`p_type` argument.
@param[in] p_data: input data for cluster analysis.
@param[in] p_type: type of input data (points or distance matrix).
@param[out] p_result: clustering result of an input data (consists of allocated clusters,
cluster-ordering, noise and proper connectivity radius).
*/
void process(const dataset & p_data, const optics_data_t p_type, optics_data & p_result);
private:
void initialize();
void allocate_clusters();
void expand_cluster_order(optics_descriptor & p_object);
void extract_clusters();
void get_neighbors(const std::size_t p_index, neighbors_collection & p_neighbors);
void get_neighbors_from_points(const std::size_t p_index, neighbors_collection & p_neighbors);
void get_neighbors_from_distance_matrix(const std::size_t p_index, neighbors_collection & p_neighbors);
double get_core_distance(const neighbors_collection & p_neighbors) const;
void update_order_seed(const optics_descriptor & p_object, const neighbors_collection & p_neighbors, std::multiset<optics_descriptor *, optics_pointer_descriptor_less> & order_seed);
void calculate_ordering();
void calculate_cluster_result();
void create_kdtree();
};
}
}
|