File: silhouette_templates.py

package info (click to toggle)
python-pyclustering 0.10.1.2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 11,128 kB
  • sloc: cpp: 38,888; python: 24,311; sh: 384; makefile: 105
file content (106 lines) | stat: -rwxr-xr-x 3,952 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
"""!

@brief Test templates for Silhouette clustering module.

@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright BSD-3-Clause

"""

import math

from pyclustering.cluster.silhouette import silhouette, silhouette_ksearch

from pyclustering.samples import answer_reader
from pyclustering.samples.definitions import SIMPLE_SAMPLES

from pyclustering.tests.assertion import assertion

from pyclustering.utils import read_sample, calculate_distance_matrix, distance_metric, type_metric


class silhouette_test_template:
    @staticmethod
    def correct_scores(sample_path, answer_path, ccore_flag, **kwargs):
        data_type = kwargs.get('data_type', 'points')

        sample = read_sample(sample_path)
        if data_type == 'distance_matrix':
            sample = calculate_distance_matrix(sample, distance_metric(type_metric.EUCLIDEAN_SQUARE))

        clusters = answer_reader(answer_path).get_clusters()

        scores = silhouette(sample, clusters, ccore=ccore_flag, data_type=data_type).process().get_score()

        assertion.eq(len(sample), len(scores))
        for score in scores:
            assertion.le(-1.0, score)
            assertion.ge(1.0, score)

        return scores


    @staticmethod
    def correct_processing_data_types(sample_path, answer_path, ccore_flag):
        scores_points = silhouette_test_template.correct_scores(sample_path, answer_path, ccore_flag, data_type='points')
        scores_matrix = silhouette_test_template.correct_scores(sample_path, answer_path, ccore_flag, data_type='distance_matrix')

        assertion.eq(len(scores_points), len(scores_matrix))
        assertion.eq(scores_points, scores_matrix)


    @staticmethod
    def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag):
        attempts = 15
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            upper_limit = len(clusters) + 1
            lower_limit = len(clusters) - 1
            if lower_limit < 1:
                lower_limit = 1

            if (amount > upper_limit) or (amount < lower_limit):
                continue

            testing_result = True
            break

        assertion.true(testing_result)


    @staticmethod
    def random_state(kmin, kmax, algorithm, random_state, ccore_flag):
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE4)

        ksearch_instance_1 = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, random_state=random_state,
                                                ccore=ccore_flag).process()

        ksearch_instance_2 = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, random_state=random_state,
                                                ccore=ccore_flag).process()

        assertion.eq(ksearch_instance_1.get_amount(), ksearch_instance_2.get_amount())
        assertion.eq(ksearch_instance_1.get_score(), ksearch_instance_2.get_score())
        assertion.eq(len(ksearch_instance_1.get_scores()), len(ksearch_instance_2.get_scores()))

        scores1 = ksearch_instance_1.get_scores()
        scores2 = ksearch_instance_2.get_scores()
        for key in scores1:
            key = int(key)
            if math.isnan(scores1[key]) and math.isnan(scores2[key]):
                continue
            else:
                assertion.eq(scores1[key], scores2[key])