File: fcm.hpp

package info (click to toggle)
python-pyclustering 0.10.1.2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 11,128 kB
  • sloc: cpp: 38,888; python: 24,311; sh: 384; makefile: 105
file content (173 lines) | stat: -rwxr-xr-x 5,874 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/*!

@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright BSD-3-Clause

*/

#pragma once


#include <pyclustering/cluster/fcm_data.hpp>


namespace pyclustering {

namespace clst {

/*!

@class      fcm fcm.hpp pyclustering/cluster/fcm.hpp

@brief      Class represents Fuzzy C-means (FCM) clustering algorithm.
@details    Fuzzy clustering is a form of clustering in which each data point can belong to more than one cluster.

Fuzzy C-Means algorithm uses two general formulas for cluster analysis. The first is to updated membership of each
point:
\f[w_{ij}=\frac{1}{\sum_{k=0}^{c}\left ( \frac{\left \| x_{i}-c_{j} \right \|}{\left \| x_{i}-c_{k} \right \|} \right )^{\frac{2}{m-1}}}\f]

The second formula is used to update centers in line with obtained centers:
\f[c_{k}=\frac{\sum_{i=0}^{N}w_{k}\left ( x_{i} \right )^{m}x_{i}}{\sum_{i=0}^{N}w_{k}\left ( x_{i} \right )^{m}}\f]

Fuzzy C-Means clustering results depend on initial centers. Algorithm K-Means++ can used for center initialization to improve
clustering quality.

Here is an example how to perform cluster analysis using Fuzzy C-Means algorithm:
@code
    int main() {
        // Read two-dimensional input data 'OldFaithful'.
        dataset data = read_data("OldFaithful.txt");

        // Prepare initial centers
        const std::size_t amount_clusters = 2;
        const std::size_t candidates = 5;

        dataset initial_centers;
        kmeans_plus_plus(amount_clusters, candidates).initialize(data, initial_centers);

        // Create and run FCM clustering algorithm.
        fcm_data result;
        fcm(initial_centers).process(data, result);

        // Obtain clustering results.
        const cluster_sequence & clusters = result.clusters();
        const dataset & centers = result.centers();
        const membership_sequence & membership = result.membership();

        // Display points which have membership probability less than 90%.
        std::cout << "Points that have membership probability less than 90%: ";
        for (std::size_t i = 0; i < membership.size(); i++) {
            if (membership[i][0] > 0.1 && membership[i][0] < 0.9) {
                std::cout << i << " ";
            }
        }
        std::cout << std::endl;

        return 0;
    }
@endcode

The next example shows how to perform image segmentation using Fuzzy C-Means algorithm:
@code
    // Read image (photo), for example, using OpenCV2 or any other library.
    dataset data = read_image("stpetersburg_admiral.jpg");

    // Prepare initial centers
    const std::size_t amount_segments = 3;
    const std::size_t candidates = kmeans_plus_plus::FARTHEST_CENTER_CANDIDATE;

    dataset initial_centers;
    kmeans_plus_plus(amount_segments, candidates).initialize(data, initial_centers);

    // Create and run FCM clustering algorithm to extract color segments from the image.
    fcm_data result;
    fcm(initial_centers).process(data, result);
@endcode

@image html fcm_segmentation_stpetersburg.png "Image segmentation using Fuzzy C-Means algorithm."

Visualization has been done using Python version of pyclustering library.

*/
class fcm {
public:
    const static double             DEFAULT_TOLERANCE;          /**< Default value of the tolerance stop condition: if maximum value of change of centers of clusters is less than tolerance then algorithm stops processing. */

    const static std::size_t        DEFAULT_ITERMAX;            /**< Default value of the step stop condition - maximum number of iterations that is used for clustering process. */

    const static double             DEFAULT_HYPER_PARAMETER;    /**< Default value of hyper-parameter that controls how fuzzy the cluster will be. */

private:
    double          m_tolerance             = DEFAULT_TOLERANCE;

    std::size_t     m_itermax               = DEFAULT_ITERMAX;

    dataset         m_initial_centers       = { };

    double          m_degree                = 0.0;

    fcm_data        * m_ptr_result          = nullptr;      /* temporary pointer to output result */

    const dataset   * m_ptr_data            = nullptr;      /* used only during processing */

public:
    /*!
    
    @brief  Default constructor of FCM clustering algorithm.
    
    */
    fcm() = default;

    /*!
    
    @brief  Constructor of FCM clustering algorithm with specific parameters.

    @param[in] p_initial_centers: initial centers for clusters.
    @param[in] p_m: hyper-parameter that controls how fuzzy the cluster will be; the higher it is, the fuzzier the cluster will be in the end.
    @param[in] p_tolerance: stop condition value: if maximum value of change of centers of clusters is less 
                than tolerance then algorithm stops processing.
    @param[in] p_itermax: maximum number of iterations that is used for clustering process.
    
    */
    fcm(const dataset & p_initial_centers, 
        const double p_m = DEFAULT_HYPER_PARAMETER,
        const double p_tolerance = DEFAULT_TOLERANCE,
        const std::size_t p_itermax = DEFAULT_ITERMAX);

    /*!

    @brief  Default destructor of FCM clustering algorithm.

    */
    ~fcm() = default;

public:
    /*!

    @brief    Performs cluster analysis of an input data.

    @param[in]  p_data: input data for cluster analysis.
    @param[out] p_result: FCM clustering result of an input data.

    */
    void process(const dataset & p_data, fcm_data & p_result);

private:
    void verify() const;

    double update_centers();

    double update_center(const std::size_t p_index);

    void update_membership();

    void update_point_membership(const std::size_t p_index);

    void extract_clusters(cluster_sequence & p_clusters);
};


}

}