File: dbscan.hpp

package info (click to toggle)
python-pyclustering 0.10.1.2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, trixie
  • size: 11,128 kB
  • sloc: cpp: 38,888; python: 24,311; sh: 384; makefile: 105
file content (136 lines) | stat: -rwxr-xr-x 3,761 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/*!

@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright BSD-3-Clause

*/

#pragma once


#include <cmath>
#include <algorithm>

#include <pyclustering/container/kdtree_balanced.hpp>

#include <pyclustering/cluster/dbscan_data.hpp>


namespace pyclustering {

namespace clst {


/*!

@brief Defines types that are used for input data representation.

*/
enum class dbscan_data_t {
    POINTS,             /**< Data is represented by a container of points. */
    DISTANCE_MATRIX     /**< Data is represented by a distance matrix between points. */
};


/*!

@class    dbscan dbscan.hpp pyclustering/cluster/dbscan.hpp

@brief    Represents DBSCAN clustering algorithm for cluster analysis.
@details  The algorithm related to density-based class.

Implementation based on paper @cite inproceedings::dbscan::1.

*/
class dbscan {
private:
    const dataset *            m_data_ptr      = nullptr;  /* temporary pointer to input data that is used only during processing */

    dbscan_data *              m_result_ptr    = nullptr;  /* temporary pointer to clustering result that is used only during processing */

    std::vector<bool>          m_visited         = { };

    std::vector<bool>          m_belong          = { };

    double                     m_initial_radius  = 0.0;    /* original radius that was specified by user */

    size_t                     m_neighbors       = 0;

    dbscan_data_t              m_type            = dbscan_data_t::POINTS;

    container::kdtree_balanced m_kdtree = container::kdtree_balanced();

public:
    /*!
    
    @brief    Default constructor of clustering algorithm.
    
    */
    dbscan() = default;

    /*!
    
    @brief    Constructor of clustering algorithm where algorithm parameters for processing are
               specified.
    
    @param[in] p_radius_connectivity: connectivity radius between objects.
    @param[in] p_minimum_neighbors: minimum amount of shared neighbors that is require to connect
                two object (if distance between them is less than connectivity radius).
    
    */
    dbscan(const double p_radius_connectivity, const size_t p_minimum_neighbors);

    /*!
    
    @brief    Default destructor of the algorithm.
    
    */
    ~dbscan() = default;

public:
    /*!
    
    @brief    Performs cluster analysis of an input data.
    
    @param[in]  p_data: input data (points) for cluster analysis.
    @param[out] p_result: clustering result of an input data.
    
    */
    void process(const dataset & p_data, dbscan_data & p_result);

    /*!
    
    @brief    Performs cluster analysis of an input data of specific type.
    
    @param[in]  p_data: input data for cluster analysis.
    @param[in]  p_type: type of an input data that should be clustered.
    @param[out] p_result: clustering result of an input data.
    
    */
    void process(const dataset & p_data, const dbscan_data_t p_type, dbscan_data & p_result);

private:
    /*!
    
    @brief    Obtains neighbors of the specified node (data object).
    
    @param[in]  p_index: index of the node (data object).
    @param[out] p_neighbors: neighbor indexes of the specified node (data object).
    
    */
    void get_neighbors(const size_t p_index, std::vector<size_t> & p_neighbors);

    void get_neighbors_from_points(const size_t p_index, std::vector<size_t> & p_neighbors);

    void get_neighbors_from_distance_matrix(const size_t p_index, std::vector<size_t> & p_neighbors);

    void create_kdtree(const dataset & p_data);

    void expand_cluster(const std::size_t p_index, cluster & allocated_cluster);
};


}

}