File: sampling_templates.py

package info (click to toggle)
python-pyclustering 0.10.1.2-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 11,128 kB
  • sloc: cpp: 38,888; python: 24,311; sh: 384; makefile: 105
file content (43 lines) | stat: -rwxr-xr-x 1,191 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""!

@brief Test templates for sampling module.

@authors Andrei Novikov (pyclustering@yandex.ru)
@date 2014-2020
@copyright BSD-3-Clause

"""

import collections

from scipy import stats

from pyclustering.tests.assertion import assertion



class sampling_test_template:
    @staticmethod
    def random_sampling(data, n, algorithm, repeat, ccore=True):
        for _ in range(repeat):
            sample = algorithm(data, n)
            unique_values = set(sample)

            assertion.eq(n, len(sample))
            assertion.eq(len(unique_values), len(sample))


    @staticmethod
    def uniform_distribution(data, n, algorithm, repeat, supremum_cdf=0.06, ccore=True):
        # Supremum CDF < 0.06 is almost about uniform distribution (for R algorithm).
        # Supremum CDF < 0.4 is for X algorithm
        min_value = min(data)
        max_value = max(data)
        scale = max_value - min_value

        stream = collections.deque()
        for _ in range(repeat):
            stream += algorithm(data, n)

        D, pvalue = stats.kstest(stream, stats.uniform(loc=min_value, scale=scale).cdf)
        assertion.gt(supremum_cdf, D)