File: random_center_initializer.hpp

package info (click to toggle)
python-pyclustering 0.10.1.2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 11,128 kB
  • sloc: cpp: 38,888; python: 24,311; sh: 384; makefile: 105
file content (142 lines) | stat: -rwxr-xr-x 3,663 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
/*!

@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright BSD-3-Clause

*/


#pragma once


#include <random>
#include <unordered_set>

#include <pyclustering/definitions.hpp>

#include <pyclustering/cluster/center_initializer.hpp>
#include <pyclustering/cluster/cluster_data.hpp>

#include <pyclustering/utils/metric.hpp>


using namespace pyclustering::utils::metric;


namespace pyclustering {

namespace clst {


/*!

@class   random_center_initializer random_center_initializer.hpp pyclustering/cluster/random_center_initializer.hpp

@brief   Random center initializer is for generation specified amount of random of centers for specified data.

*/
class random_center_initializer : public center_initializer {
private:
    /**
     *
     * @brief Storage where indexes are stored.
     *
     */
    using index_storage = std::unordered_set<std::size_t>;

private:
    std::size_t             m_amount            = 0;

    long long               m_random_state      = RANDOM_STATE_CURRENT_TIME;

    mutable std::mt19937    m_generator;

    mutable index_storage   m_available_indexes = { };

public:
    /**
     *
     * @brief Default constructor to create random center initializer.
     *
     */
    random_center_initializer() = default;

    /**
     *
     * @brief    Constructor of center initializer algorithm K-Means++.
     *
     * @param[in] p_amount: amount of centers that should initialized.
     * @param[in] p_random_state: seed for random state (by default is `RANDOM_STATE_CURRENT_TIME`, current system time is used).
     *
     */
    explicit random_center_initializer(const std::size_t p_amount, const long long p_random_state);

    /**
     *
     * @brief Default copy constructor to create random center initializer.
     *
     */
    random_center_initializer(const random_center_initializer & p_other) = default;

    /**
     *
     * @brief Default move constructor to create random center initializer.
     *
     */
    random_center_initializer(random_center_initializer && p_other) = default;

    /**
     *
     * @brief Default destructor to destroy random center initializer.
     *
     */
    ~random_center_initializer() = default;

public:
    /**
    *
    * @brief    Performs center initialization process in line algorithm configuration.
    *
    * @param[in]  p_data: data for that centers are calculated.
    * @param[out] p_centers: initialized centers for the specified data.
    *
    */
    void initialize(const dataset & p_data, dataset & p_centers) const override;

    /**
    *
    * @brief    Performs center initialization process in line algorithm configuration for
    *           specific range of points.
    *
    * @param[in]  p_data: data for that centers are calculated.
    * @param[in]  p_indexes: point indexes from data that are defines which points should be considered
    *              during calculation process. If empty then all data points are considered.
    * @param[out] p_centers: initialized centers for the specified data.
    *
    */
    void initialize(const dataset & p_data, const index_sequence & p_indexes, dataset & p_centers) const override;

private:
    /**
    *
    * @brief    Creates random center and place it to specified storage.
    *
    * @param[in]  p_data: data for that centers are calculated.
    * @param[out] p_centers: storage where new center should be placed.
    *
    */
    void create_center(const dataset & p_data, dataset & p_centers) const;

    /**
    *
    * @brief    Assigns seed to the random generator that is used by the algorithm.
    *
    */
    void initialize_random_generator();
};


}

}