1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
/*!
@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright BSD-3-Clause
*/
#pragma once
#include <unordered_set>
#include <pyclustering/cluster/bsas.hpp>
#include <pyclustering/cluster/ttsas_data.hpp>
#include <pyclustering/utils/metric.hpp>
using namespace pyclustering::utils::metric;
namespace pyclustering {
namespace clst {
/*!
@class ttsas ttsas.hpp pyclustering/cluster/ttsas.hpp
@brief Class represents TTSAS (Two-Threshold Sequential Algorithmic Scheme).
@details Clustering results of BSAS and MBSAS are strongly dependent on the order in which the points in data.
TTSAS helps to overcome this shortcoming by using two threshold parameters. The first - if the distance
to the nearest cluster is less than the first threshold then point is assigned to the cluster. The
second - if distance to the nearest cluster is greater than the second threshold then new cluster is
allocated.
Code example of TTSAS usage:
@code
#include <pyclustering/cluster/ttsas.hpp>
#include <iostream>
// ... `read_data` implementation to read sample ...
int main() {
// Read two-dimensional input data 'Simple03'.
dataset data = read_data("Simple03.txt");
// Prepare parameters for TTSAS algorithm.
const double threshold1 = 1.0;
const double threshold2 = 2.0;
// Create TTSAS algorithm and perform cluster analysis.
ttsas ttsas_instance = ttsas(threshold1, threshold2);
ttsas_data clustering_result;
ttsas_instance.process(data, clustering_result);
// Obtain allocated clusters.
const auto & clusters = clustering_result.clusters();
// Print result.
std::cout << "Amount of allocated clusters: " << clusters.size() << std::endl;
return 0;
}
@endcode
Implementation based on paper @cite book::pattern_recognition::2009.
*/
class ttsas : public bsas {
private:
const dataset * m_data_ptr = nullptr; /* temporary pointer to data - exists only during processing */
double m_threshold2 = 0.0;
std::vector<bool> m_skipped_objects = { };
std::size_t m_start;
public:
/*!
@brief Default TTSAS constructor.
*/
ttsas() = default;
/*!
@brief TTSAS constructor with specific parameters.
@param[in] p_threshold1: dissimilarity level (distance) between point and its closest cluster, if the distance is
less than `threshold1` value then point is assigned to the cluster.
@param[in] p_threshold2: dissimilarity level (distance) between point and its closest cluster, if the distance is
greater than `threshold2` value then point is considered as a new cluster.
@param[in] p_metric: metric that is used for distance calculation between two points.
*/
ttsas(const double p_threshold1,
const double p_threshold2,
const distance_metric<point> & p_metric = distance_metric_factory<point>::euclidean());
public:
/*!
@brief Performs cluster analysis of an input data.
@param[in] p_data: input data for cluster analysis.
@param[out] p_result: TTSAS clustering result of an input data.
*/
virtual void process(const dataset & p_data, ttsas_data & p_result) override;
private:
void process_objects(const std::size_t p_changes);
void process_skipped_object(const std::size_t p_index_point);
void append_to_cluster(const std::size_t p_index_cluster, const std::size_t p_index_point, const point & p_point);
void allocate_cluster(const std::size_t p_index_point, const point & p_point);
};
}
}
|