File: test_supervised.py

package info (click to toggle)
scikit-learn 1.2.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 23,280 kB
  • sloc: python: 184,491; cpp: 5,783; ansic: 854; makefile: 307; sh: 45; javascript: 1
file content (483 lines) | stat: -rw-r--r-- 18,316 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
import warnings

import numpy as np
import pytest

from sklearn.metrics.cluster import adjusted_mutual_info_score
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics.cluster import rand_score
from sklearn.metrics.cluster import completeness_score
from sklearn.metrics.cluster import contingency_matrix
from sklearn.metrics.cluster import pair_confusion_matrix
from sklearn.metrics.cluster import entropy
from sklearn.metrics.cluster import expected_mutual_information
from sklearn.metrics.cluster import fowlkes_mallows_score
from sklearn.metrics.cluster import homogeneity_completeness_v_measure
from sklearn.metrics.cluster import homogeneity_score
from sklearn.metrics.cluster import mutual_info_score
from sklearn.metrics.cluster import normalized_mutual_info_score
from sklearn.metrics.cluster import v_measure_score
from sklearn.metrics.cluster._supervised import _generalized_average
from sklearn.metrics.cluster._supervised import check_clusterings

from sklearn.utils import assert_all_finite
from sklearn.utils._testing import assert_almost_equal
from numpy.testing import assert_array_equal, assert_array_almost_equal, assert_allclose


score_funcs = [
    adjusted_rand_score,
    rand_score,
    homogeneity_score,
    completeness_score,
    v_measure_score,
    adjusted_mutual_info_score,
    normalized_mutual_info_score,
]


def test_error_messages_on_wrong_input():
    for score_func in score_funcs:
        expected = (
            r"Found input variables with inconsistent numbers " r"of samples: \[2, 3\]"
        )
        with pytest.raises(ValueError, match=expected):
            score_func([0, 1], [1, 1, 1])

        expected = r"labels_true must be 1D: shape is \(2"
        with pytest.raises(ValueError, match=expected):
            score_func([[0, 1], [1, 0]], [1, 1, 1])

        expected = r"labels_pred must be 1D: shape is \(2"
        with pytest.raises(ValueError, match=expected):
            score_func([0, 1, 0], [[1, 1], [0, 0]])


def test_generalized_average():
    a, b = 1, 2
    methods = ["min", "geometric", "arithmetic", "max"]
    means = [_generalized_average(a, b, method) for method in methods]
    assert means[0] <= means[1] <= means[2] <= means[3]
    c, d = 12, 12
    means = [_generalized_average(c, d, method) for method in methods]
    assert means[0] == means[1] == means[2] == means[3]


def test_perfect_matches():
    for score_func in score_funcs:
        assert score_func([], []) == pytest.approx(1.0)
        assert score_func([0], [1]) == pytest.approx(1.0)
        assert score_func([0, 0, 0], [0, 0, 0]) == pytest.approx(1.0)
        assert score_func([0, 1, 0], [42, 7, 42]) == pytest.approx(1.0)
        assert score_func([0.0, 1.0, 0.0], [42.0, 7.0, 42.0]) == pytest.approx(1.0)
        assert score_func([0.0, 1.0, 2.0], [42.0, 7.0, 2.0]) == pytest.approx(1.0)
        assert score_func([0, 1, 2], [42, 7, 2]) == pytest.approx(1.0)
    score_funcs_with_changing_means = [
        normalized_mutual_info_score,
        adjusted_mutual_info_score,
    ]
    means = {"min", "geometric", "arithmetic", "max"}
    for score_func in score_funcs_with_changing_means:
        for mean in means:
            assert score_func([], [], average_method=mean) == pytest.approx(1.0)
            assert score_func([0], [1], average_method=mean) == pytest.approx(1.0)
            assert score_func(
                [0, 0, 0], [0, 0, 0], average_method=mean
            ) == pytest.approx(1.0)
            assert score_func(
                [0, 1, 0], [42, 7, 42], average_method=mean
            ) == pytest.approx(1.0)
            assert score_func(
                [0.0, 1.0, 0.0], [42.0, 7.0, 42.0], average_method=mean
            ) == pytest.approx(1.0)
            assert score_func(
                [0.0, 1.0, 2.0], [42.0, 7.0, 2.0], average_method=mean
            ) == pytest.approx(1.0)
            assert score_func(
                [0, 1, 2], [42, 7, 2], average_method=mean
            ) == pytest.approx(1.0)


def test_homogeneous_but_not_complete_labeling():
    # homogeneous but not complete clustering
    h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 2, 2])
    assert_almost_equal(h, 1.00, 2)
    assert_almost_equal(c, 0.69, 2)
    assert_almost_equal(v, 0.81, 2)


def test_complete_but_not_homogeneous_labeling():
    # complete but not homogeneous clustering
    h, c, v = homogeneity_completeness_v_measure([0, 0, 1, 1, 2, 2], [0, 0, 1, 1, 1, 1])
    assert_almost_equal(h, 0.58, 2)
    assert_almost_equal(c, 1.00, 2)
    assert_almost_equal(v, 0.73, 2)


def test_not_complete_and_not_homogeneous_labeling():
    # neither complete nor homogeneous but not so bad either
    h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
    assert_almost_equal(h, 0.67, 2)
    assert_almost_equal(c, 0.42, 2)
    assert_almost_equal(v, 0.52, 2)


def test_beta_parameter():
    # test for when beta passed to
    # homogeneity_completeness_v_measure
    # and v_measure_score
    beta_test = 0.2
    h_test = 0.67
    c_test = 0.42
    v_test = (1 + beta_test) * h_test * c_test / (beta_test * h_test + c_test)

    h, c, v = homogeneity_completeness_v_measure(
        [0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2], beta=beta_test
    )
    assert_almost_equal(h, h_test, 2)
    assert_almost_equal(c, c_test, 2)
    assert_almost_equal(v, v_test, 2)

    v = v_measure_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2], beta=beta_test)
    assert_almost_equal(v, v_test, 2)


def test_non_consecutive_labels():
    # regression tests for labels with gaps
    h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 2, 2, 2], [0, 1, 0, 1, 2, 2])
    assert_almost_equal(h, 0.67, 2)
    assert_almost_equal(c, 0.42, 2)
    assert_almost_equal(v, 0.52, 2)

    h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
    assert_almost_equal(h, 0.67, 2)
    assert_almost_equal(c, 0.42, 2)
    assert_almost_equal(v, 0.52, 2)

    ari_1 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
    ari_2 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
    assert_almost_equal(ari_1, 0.24, 2)
    assert_almost_equal(ari_2, 0.24, 2)

    ri_1 = rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
    ri_2 = rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
    assert_almost_equal(ri_1, 0.66, 2)
    assert_almost_equal(ri_2, 0.66, 2)


def uniform_labelings_scores(score_func, n_samples, k_range, n_runs=10, seed=42):
    # Compute score for random uniform cluster labelings
    random_labels = np.random.RandomState(seed).randint
    scores = np.zeros((len(k_range), n_runs))
    for i, k in enumerate(k_range):
        for j in range(n_runs):
            labels_a = random_labels(low=0, high=k, size=n_samples)
            labels_b = random_labels(low=0, high=k, size=n_samples)
            scores[i, j] = score_func(labels_a, labels_b)
    return scores


def test_adjustment_for_chance():
    # Check that adjusted scores are almost zero on random labels
    n_clusters_range = [2, 10, 50, 90]
    n_samples = 100
    n_runs = 10

    scores = uniform_labelings_scores(
        adjusted_rand_score, n_samples, n_clusters_range, n_runs
    )

    max_abs_scores = np.abs(scores).max(axis=1)
    assert_array_almost_equal(max_abs_scores, [0.02, 0.03, 0.03, 0.02], 2)


def test_adjusted_mutual_info_score():
    # Compute the Adjusted Mutual Information and test against known values
    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
    # Mutual information
    mi = mutual_info_score(labels_a, labels_b)
    assert_almost_equal(mi, 0.41022, 5)
    # with provided sparse contingency
    C = contingency_matrix(labels_a, labels_b, sparse=True)
    mi = mutual_info_score(labels_a, labels_b, contingency=C)
    assert_almost_equal(mi, 0.41022, 5)
    # with provided dense contingency
    C = contingency_matrix(labels_a, labels_b)
    mi = mutual_info_score(labels_a, labels_b, contingency=C)
    assert_almost_equal(mi, 0.41022, 5)
    # Expected mutual information
    n_samples = C.sum()
    emi = expected_mutual_information(C, n_samples)
    assert_almost_equal(emi, 0.15042, 5)
    # Adjusted mutual information
    ami = adjusted_mutual_info_score(labels_a, labels_b)
    assert_almost_equal(ami, 0.27821, 5)
    ami = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3])
    assert ami == pytest.approx(1.0)
    # Test with a very large array
    a110 = np.array([list(labels_a) * 110]).flatten()
    b110 = np.array([list(labels_b) * 110]).flatten()
    ami = adjusted_mutual_info_score(a110, b110)
    assert_almost_equal(ami, 0.38, 2)


def test_expected_mutual_info_overflow():
    # Test for regression where contingency cell exceeds 2**16
    # leading to overflow in np.outer, resulting in EMI > 1
    assert expected_mutual_information(np.array([[70000]]), 70000) <= 1


def test_int_overflow_mutual_info_fowlkes_mallows_score():
    # Test overflow in mutual_info_classif and fowlkes_mallows_score
    x = np.array(
        [1] * (52632 + 2529)
        + [2] * (14660 + 793)
        + [3] * (3271 + 204)
        + [4] * (814 + 39)
        + [5] * (316 + 20)
    )
    y = np.array(
        [0] * 52632
        + [1] * 2529
        + [0] * 14660
        + [1] * 793
        + [0] * 3271
        + [1] * 204
        + [0] * 814
        + [1] * 39
        + [0] * 316
        + [1] * 20
    )

    assert_all_finite(mutual_info_score(x, y))
    assert_all_finite(fowlkes_mallows_score(x, y))


def test_entropy():
    ent = entropy([0, 0, 42.0])
    assert_almost_equal(ent, 0.6365141, 5)
    assert_almost_equal(entropy([]), 1)
    assert entropy([1, 1, 1, 1]) == 0


def test_contingency_matrix():
    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
    C = contingency_matrix(labels_a, labels_b)
    C2 = np.histogram2d(labels_a, labels_b, bins=(np.arange(1, 5), np.arange(1, 5)))[0]
    assert_array_almost_equal(C, C2)
    C = contingency_matrix(labels_a, labels_b, eps=0.1)
    assert_array_almost_equal(C, C2 + 0.1)


def test_contingency_matrix_sparse():
    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
    C = contingency_matrix(labels_a, labels_b)
    C_sparse = contingency_matrix(labels_a, labels_b, sparse=True).toarray()
    assert_array_almost_equal(C, C_sparse)
    with pytest.raises(ValueError, match="Cannot set 'eps' when sparse=True"):
        contingency_matrix(labels_a, labels_b, eps=1e-10, sparse=True)


def test_exactly_zero_info_score():
    # Check numerical stability when information is exactly zero
    for i in np.logspace(1, 4, 4).astype(int):
        labels_a, labels_b = (np.ones(i, dtype=int), np.arange(i, dtype=int))
        assert normalized_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)
        assert v_measure_score(labels_a, labels_b) == pytest.approx(0.0)
        assert adjusted_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)
        assert normalized_mutual_info_score(labels_a, labels_b) == pytest.approx(0.0)
        for method in ["min", "geometric", "arithmetic", "max"]:
            assert adjusted_mutual_info_score(
                labels_a, labels_b, average_method=method
            ) == pytest.approx(0.0)
            assert normalized_mutual_info_score(
                labels_a, labels_b, average_method=method
            ) == pytest.approx(0.0)


def test_v_measure_and_mutual_information(seed=36):
    # Check relation between v_measure, entropy and mutual information
    for i in np.logspace(1, 4, 4).astype(int):
        random_state = np.random.RandomState(seed)
        labels_a, labels_b = (
            random_state.randint(0, 10, i),
            random_state.randint(0, 10, i),
        )
        assert_almost_equal(
            v_measure_score(labels_a, labels_b),
            2.0
            * mutual_info_score(labels_a, labels_b)
            / (entropy(labels_a) + entropy(labels_b)),
            0,
        )
        avg = "arithmetic"
        assert_almost_equal(
            v_measure_score(labels_a, labels_b),
            normalized_mutual_info_score(labels_a, labels_b, average_method=avg),
        )


def test_fowlkes_mallows_score():
    # General case
    score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [0, 0, 1, 1, 2, 2])
    assert_almost_equal(score, 4.0 / np.sqrt(12.0 * 6.0))

    # Perfect match but where the label names changed
    perfect_score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [1, 1, 1, 0, 0, 0])
    assert_almost_equal(perfect_score, 1.0)

    # Worst case
    worst_score = fowlkes_mallows_score([0, 0, 0, 0, 0, 0], [0, 1, 2, 3, 4, 5])
    assert_almost_equal(worst_score, 0.0)


def test_fowlkes_mallows_score_properties():
    # handcrafted example
    labels_a = np.array([0, 0, 0, 1, 1, 2])
    labels_b = np.array([1, 1, 2, 2, 0, 0])
    expected = 1.0 / np.sqrt((1.0 + 3.0) * (1.0 + 2.0))
    # FMI = TP / sqrt((TP + FP) * (TP + FN))

    score_original = fowlkes_mallows_score(labels_a, labels_b)
    assert_almost_equal(score_original, expected)

    # symmetric property
    score_symmetric = fowlkes_mallows_score(labels_b, labels_a)
    assert_almost_equal(score_symmetric, expected)

    # permutation property
    score_permuted = fowlkes_mallows_score((labels_a + 1) % 3, labels_b)
    assert_almost_equal(score_permuted, expected)

    # symmetric and permutation(both together)
    score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)
    assert_almost_equal(score_both, expected)


@pytest.mark.parametrize(
    "labels_true, labels_pred",
    [
        (["a"] * 6, [1, 1, 0, 0, 1, 1]),
        ([1] * 6, [1, 1, 0, 0, 1, 1]),
        ([1, 1, 0, 0, 1, 1], ["a"] * 6),
        ([1, 1, 0, 0, 1, 1], [1] * 6),
        (["a"] * 6, ["a"] * 6),
    ],
)
def test_mutual_info_score_positive_constant_label(labels_true, labels_pred):
    # Check that MI = 0 when one or both labelling are constant
    # non-regression test for #16355
    assert mutual_info_score(labels_true, labels_pred) == 0


def test_check_clustering_error():
    # Test warning message for continuous values
    rng = np.random.RandomState(42)
    noise = rng.rand(500)
    wavelength = np.linspace(0.01, 1, 500) * 1e-6
    msg = (
        "Clustering metrics expects discrete values but received "
        "continuous values for label, and continuous values for "
        "target"
    )

    with pytest.warns(UserWarning, match=msg):
        check_clusterings(wavelength, noise)


def test_pair_confusion_matrix_fully_dispersed():
    # edge case: every element is its own cluster
    N = 100
    clustering1 = list(range(N))
    clustering2 = clustering1
    expected = np.array([[N * (N - 1), 0], [0, 0]])
    assert_array_equal(pair_confusion_matrix(clustering1, clustering2), expected)


def test_pair_confusion_matrix_single_cluster():
    # edge case: only one cluster
    N = 100
    clustering1 = np.zeros((N,))
    clustering2 = clustering1
    expected = np.array([[0, 0], [0, N * (N - 1)]])
    assert_array_equal(pair_confusion_matrix(clustering1, clustering2), expected)


def test_pair_confusion_matrix():
    # regular case: different non-trivial clusterings
    n = 10
    N = n**2
    clustering1 = np.hstack([[i + 1] * n for i in range(n)])
    clustering2 = np.hstack([[i + 1] * (n + 1) for i in range(n)])[:N]
    # basic quadratic implementation
    expected = np.zeros(shape=(2, 2), dtype=np.int64)
    for i in range(len(clustering1)):
        for j in range(len(clustering2)):
            if i != j:
                same_cluster_1 = int(clustering1[i] == clustering1[j])
                same_cluster_2 = int(clustering2[i] == clustering2[j])
                expected[same_cluster_1, same_cluster_2] += 1
    assert_array_equal(pair_confusion_matrix(clustering1, clustering2), expected)


@pytest.mark.parametrize(
    "clustering1, clustering2",
    [(list(range(100)), list(range(100))), (np.zeros((100,)), np.zeros((100,)))],
)
def test_rand_score_edge_cases(clustering1, clustering2):
    # edge case 1: every element is its own cluster
    # edge case 2: only one cluster
    assert_allclose(rand_score(clustering1, clustering2), 1.0)


def test_rand_score():
    # regular case: different non-trivial clusterings
    clustering1 = [0, 0, 0, 1, 1, 1]
    clustering2 = [0, 1, 0, 1, 2, 2]
    # pair confusion matrix
    D11 = 2 * 2  # ordered pairs (1, 3), (5, 6)
    D10 = 2 * 4  # ordered pairs (1, 2), (2, 3), (4, 5), (4, 6)
    D01 = 2 * 1  # ordered pair (2, 4)
    D00 = 5 * 6 - D11 - D01 - D10  # the remaining pairs
    # rand score
    expected_numerator = D00 + D11
    expected_denominator = D00 + D01 + D10 + D11
    expected = expected_numerator / expected_denominator
    assert_allclose(rand_score(clustering1, clustering2), expected)


def test_adjusted_rand_score_overflow():
    """Check that large amount of data will not lead to overflow in
    `adjusted_rand_score`.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20305
    """
    rng = np.random.RandomState(0)
    y_true = rng.randint(0, 2, 100_000, dtype=np.int8)
    y_pred = rng.randint(0, 2, 100_000, dtype=np.int8)
    with warnings.catch_warnings():
        warnings.simplefilter("error", RuntimeWarning)
        adjusted_rand_score(y_true, y_pred)


@pytest.mark.parametrize("average_method", ["min", "arithmetic", "geometric", "max"])
def test_normalized_mutual_info_score_bounded(average_method):
    """Check that nmi returns a score between 0 (included) and 1 (excluded
    for non-perfect match)

    Non-regression test for issue #13836
    """
    labels1 = [0] * 469
    labels2 = [1] + labels1[1:]
    labels3 = [0, 1] + labels1[2:]

    # labels1 is constant. The mutual info between labels1 and any other labelling is 0.
    nmi = normalized_mutual_info_score(labels1, labels2, average_method=average_method)
    assert nmi == 0

    # non constant, non perfect matching labels
    nmi = normalized_mutual_info_score(labels2, labels3, average_method=average_method)
    assert 0 <= nmi < 1