File: test_score_objects.py

package info (click to toggle)
imbalanced-learn 0.12.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,160 kB
  • sloc: python: 17,221; sh: 481; makefile: 187; javascript: 50
file content (78 lines) | stat: -rw-r--r-- 2,091 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""Test for score"""
# Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
#          Christos Aridas
# License: MIT

import pytest
from sklearn.datasets import make_blobs
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV, train_test_split

from imblearn.metrics import (
    geometric_mean_score,
    make_index_balanced_accuracy,
    sensitivity_score,
    specificity_score,
)

R_TOL = 1e-2


@pytest.fixture
def data():
    X, y = make_blobs(random_state=0, centers=2)
    return train_test_split(X, y, random_state=0)


@pytest.mark.parametrize(
    "score, expected_score",
    [
        (sensitivity_score, 0.90),
        (specificity_score, 0.90),
        (geometric_mean_score, 0.90),
        (make_index_balanced_accuracy()(geometric_mean_score), 0.82),
    ],
)
@pytest.mark.parametrize("average", ["macro", "weighted", "micro"])
def test_scorer_common_average(data, score, expected_score, average):
    X_train, X_test, y_train, _ = data

    scorer = make_scorer(score, pos_label=None, average=average)
    grid = GridSearchCV(
        LogisticRegression(),
        param_grid={"C": [1, 10]},
        scoring=scorer,
        cv=3,
    )
    grid.fit(X_train, y_train).predict(X_test)

    assert grid.best_score_ >= expected_score


@pytest.mark.parametrize(
    "score, average, expected_score",
    [
        (sensitivity_score, "binary", 0.94),
        (specificity_score, "binary", 0.89),
        (geometric_mean_score, "multiclass", 0.90),
        (
            make_index_balanced_accuracy()(geometric_mean_score),
            "multiclass",
            0.82,
        ),
    ],
)
def test_scorer_default_average(data, score, average, expected_score):
    X_train, X_test, y_train, _ = data

    scorer = make_scorer(score, pos_label=1, average=average)
    grid = GridSearchCV(
        LogisticRegression(),
        param_grid={"C": [1, 10]},
        scoring=scorer,
        cv=3,
    )
    grid.fit(X_train, y_train).predict(X_test)

    assert grid.best_score_ >= expected_score