1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
|
/*!
@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright BSD-3-Clause
*/
#pragma once
#include <vector>
#include <pyclustering/cluster/cluster_data.hpp>
#include <pyclustering/definitions.hpp>
namespace pyclustering {
namespace clst {
/*!
@brief A storage where Agglometative clustering results are stored.
*/
using agglomerative_data = cluster_data;
/*!
@class agglomerative agglomerative.hpp pyclustering/cluster/agglomerative.hpp
@brief Agglomerative algorithm implementation that is used bottom up approach for clustering.
@details Agglomerative algorithm considers each data point (object) as a separate cluster at the beginning and
step by step finds the best pair of clusters for merge until required amount of clusters is obtained.
Example of agglomerative algorithm where centroid link 'CENTROID_LINK' is used for clustering sample 'Simple01':
@code
using namespace pyclustering;
using namespace pyclustering::clst;
int main() {
// Read an input data 'Simple01' from text file.
dataset data = read_data("Simple01.txt");
// Create storage where the result is going to be stored.
agglomerative_data result;
// Create the algorithm and process the input data using centroid link.
agglomerative(2, agglomerative::type_link::CENTROID_LINK).process(data, result);
// Display allocated clusters.
for (auto group : result.clusters()) {
std::cout << "[ ";
for (auto index : group) { std::cout << index << " "; }
std::cout << "]";
}
return 0;
}
@endcode
Example of 'Lsun' clustering by the algorithm where single link method is more suitable due to elongated clusters:
@code
dataset data = read_data("Lsun.txt");
agglomerative_data result;
agglomerative(2, agglomerative::type_link::SINGLE_LINK).process(data, result);
@endcode
There is an illustration how various methods affect the clustering result:
@image html agglomerative_lsun_clustering_single_link.png
Implementation based on paper @cite book::algorithms_for_clustering_data.
*/
class agglomerative {
public:
/*!
@brief Defines methods (how to define closest clusters) for Agglomerative clustering.
*/
enum class type_link {
SINGLE_LINK = 0, /**< Distance between the two nearest objects in clusters is considered as a link, so-called SLINK method (the single-link clustering method). */
COMPLETE_LINK = 1, /**< Distance between the farthest objects in clusters is considered as a link, so-called CLINK method (the complete-link clustering method). */
AVERAGE_LINK = 2, /**< Average distance between objects in clusters is considered as a link. */
CENTROID_LINK = 3 /**< Distance between centers of clusters is considered as a link. */
};
private:
size_t m_number_clusters;
type_link m_similarity;
dataset m_centers;
cluster_sequence * m_ptr_clusters;
const dataset * m_ptr_data;
public:
/*!
@brief Default constructor of the clustering algorithm.
*/
agglomerative();
/*!
@brief Constructor of clustering algorithm where algorithm parameters for processing are
specified.
@param[in] number_clusters: amount of clusters that should be allocated.
@param[in] link: type of the linking method for clustering.
*/
agglomerative(const size_t number_clusters, const type_link link);
/*!
@brief Default destructor of the algorithm.
*/
~agglomerative() = default;
public:
/*!
@brief Performs cluster analysis of an input data.
@param[in] p_data: an input data that should be clusted.
@param[out] p_result: agglomerative clustering result of an input data.
*/
void process(const dataset & p_data, agglomerative_data & p_result);
private:
/*!
@brief Merges the most similar clusters in line with link type.
*/
void merge_similar_clusters();
/*!
@brief Merges the most similar clusters in line with average link type.
*/
void merge_by_average_link();
/*!
@brief Merges the most similar clusters in line with centroid link type.
*/
void merge_by_centroid_link();
/*!
@brief Merges the most similar clusters in line with complete link type.
*/
void merge_by_complete_link();
/*!
@brief Merges the most similar clusters in line with single link type.
*/
void merge_by_signle_link();
/*!
@brief Calculates new center.
@param[in] cluster: cluster whose center should be calculated.
@param[out] center: coordinates of the cluster center.
*/
void calculate_center(const cluster & cluster, point & center) const;
};
}
}
|