1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
|
/*!
@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright BSD-3-Clause
*/
#pragma once
#include <cmath>
#include <algorithm>
#include <pyclustering/container/kdtree_balanced.hpp>
#include <pyclustering/cluster/dbscan_data.hpp>
namespace pyclustering {
namespace clst {
/*!
@brief Defines types that are used for input data representation.
*/
enum class dbscan_data_t {
POINTS, /**< Data is represented by a container of points. */
DISTANCE_MATRIX /**< Data is represented by a distance matrix between points. */
};
/*!
@class dbscan dbscan.hpp pyclustering/cluster/dbscan.hpp
@brief Represents DBSCAN clustering algorithm for cluster analysis.
@details The algorithm related to density-based class.
Implementation based on paper @cite inproceedings::dbscan::1.
*/
class dbscan {
private:
const dataset * m_data_ptr = nullptr; /* temporary pointer to input data that is used only during processing */
dbscan_data * m_result_ptr = nullptr; /* temporary pointer to clustering result that is used only during processing */
std::vector<bool> m_visited = { };
std::vector<bool> m_belong = { };
double m_initial_radius = 0.0; /* original radius that was specified by user */
size_t m_neighbors = 0;
dbscan_data_t m_type = dbscan_data_t::POINTS;
container::kdtree_balanced m_kdtree = container::kdtree_balanced();
public:
/*!
@brief Default constructor of clustering algorithm.
*/
dbscan() = default;
/*!
@brief Constructor of clustering algorithm where algorithm parameters for processing are
specified.
@param[in] p_radius_connectivity: connectivity radius between objects.
@param[in] p_minimum_neighbors: minimum amount of shared neighbors that is require to connect
two object (if distance between them is less than connectivity radius).
*/
dbscan(const double p_radius_connectivity, const size_t p_minimum_neighbors);
/*!
@brief Default destructor of the algorithm.
*/
~dbscan() = default;
public:
/*!
@brief Performs cluster analysis of an input data.
@param[in] p_data: input data (points) for cluster analysis.
@param[out] p_result: clustering result of an input data.
*/
void process(const dataset & p_data, dbscan_data & p_result);
/*!
@brief Performs cluster analysis of an input data of specific type.
@param[in] p_data: input data for cluster analysis.
@param[in] p_type: type of an input data that should be clustered.
@param[out] p_result: clustering result of an input data.
*/
void process(const dataset & p_data, const dbscan_data_t p_type, dbscan_data & p_result);
private:
/*!
@brief Obtains neighbors of the specified node (data object).
@param[in] p_index: index of the node (data object).
@param[out] p_neighbors: neighbor indexes of the specified node (data object).
*/
void get_neighbors(const size_t p_index, std::vector<size_t> & p_neighbors);
void get_neighbors_from_points(const size_t p_index, std::vector<size_t> & p_neighbors);
void get_neighbors_from_distance_matrix(const size_t p_index, std::vector<size_t> & p_neighbors);
void create_kdtree(const dataset & p_data);
void expand_cluster(const std::size_t p_index, cluster & allocated_cluster);
};
}
}
|