File: test_plotting.py

package info (click to toggle)
scikit-learn 1.7.2%2Bdfsg-4
links: PTS, VCS
area: main
in suites: forky, sid
size: 25,752 kB
sloc: python: 219,120; cpp: 5,790; ansic: 846; makefile: 190; javascript: 110
file content (544 lines) | stat: -rw-r--r-- 19,938 bytes
import numpy as np
import pytest

from sklearn.linear_model import LogisticRegression
from sklearn.utils._plotting import (
    _BinaryClassifierCurveDisplayMixin,
    _deprecate_estimator_name,
    _despine,
    _interval_max_min_ratio,
    _validate_score_name,
    _validate_style_kwargs,
)
from sklearn.utils._response import _get_response_values_binary
from sklearn.utils._testing import assert_allclose


@pytest.mark.parametrize("ax", [None, "Ax"])
@pytest.mark.parametrize(
    "name, expected_name_out", [(None, "TestEstimator"), ("CustomName", "CustomName")]
)
def test_validate_plot_params(pyplot, ax, name, expected_name_out):
    """Check `_validate_plot_params` returns the correct values."""
    display = _BinaryClassifierCurveDisplayMixin()
    display.estimator_name = "TestEstimator"
    if ax:
        _, ax = pyplot.subplots()
    ax_out, _, name_out = display._validate_plot_params(ax=ax, name=name)

    assert name_out == expected_name_out

    if ax:
        assert ax == ax_out


@pytest.mark.parametrize("pos_label", [None, 0])
@pytest.mark.parametrize("name", [None, "CustomName"])
@pytest.mark.parametrize(
    "response_method", ["auto", "predict_proba", "decision_function"]
)
def test_validate_and_get_response_values(pyplot, pos_label, name, response_method):
    """Check `_validate_and_get_response_values` returns the correct values."""
    X = np.array([[0, 0], [1, 1], [2, 2], [3, 3]])
    y = np.array([0, 0, 2, 2])
    estimator = LogisticRegression().fit(X, y)

    y_pred, pos_label, name_out = (
        _BinaryClassifierCurveDisplayMixin._validate_and_get_response_values(
            estimator,
            X,
            y,
            response_method=response_method,
            pos_label=pos_label,
            name=name,
        )
    )

    expected_y_pred, expected_pos_label = _get_response_values_binary(
        estimator, X, response_method=response_method, pos_label=pos_label
    )

    assert_allclose(y_pred, expected_y_pred)
    assert pos_label == expected_pos_label

    # Check name is handled correctly
    expected_name = name if name is not None else "LogisticRegression"
    assert name_out == expected_name


@pytest.mark.parametrize(
    "y_true, error_message",
    [
        (np.array([0, 1, 2]), "The target y is not binary."),
        (np.array([0, 1]), "Found input variables with inconsistent"),
        (np.array([0, 2, 0, 2]), r"y_true takes value in \{0, 2\} and pos_label"),
    ],
)
def test_validate_from_predictions_params_errors(pyplot, y_true, error_message):
    """Check `_validate_from_predictions_params` raises the correct errors."""
    y_pred = np.array([0.1, 0.2, 0.3, 0.4])
    sample_weight = np.ones(4)

    with pytest.raises(ValueError, match=error_message):
        _BinaryClassifierCurveDisplayMixin._validate_from_predictions_params(
            y_true=y_true,
            y_pred=y_pred,
            sample_weight=sample_weight,
            pos_label=None,
        )


@pytest.mark.parametrize("name", [None, "CustomName"])
@pytest.mark.parametrize(
    "pos_label, y_true",
    [
        (None, np.array([0, 1, 0, 1])),
        (2, np.array([0, 2, 0, 2])),
    ],
)
def test_validate_from_predictions_params_returns(pyplot, name, pos_label, y_true):
    """Check `_validate_from_predictions_params` returns the correct values."""
    y_pred = np.array([0.1, 0.2, 0.3, 0.4])
    pos_label_out, name_out = (
        _BinaryClassifierCurveDisplayMixin._validate_from_predictions_params(
            y_true=y_true,
            y_pred=y_pred,
            sample_weight=None,
            pos_label=pos_label,
            name=name,
        )
    )

    # Check name is handled correctly
    expected_name = name if name is not None else "Classifier"
    assert name_out == expected_name

    # Check pos_label is handled correctly
    expected_pos_label = pos_label if pos_label is not None else 1
    assert pos_label_out == expected_pos_label


@pytest.mark.parametrize(
    "params, err_msg",
    [
        (
            {
                # Missing "indices" key
                "cv_results": {"estimator": "dummy"},
                "X": np.array([[1, 2], [3, 4]]),
                "y": np.array([0, 1]),
                "sample_weight": None,
                "pos_label": None,
            },
            "`cv_results` does not contain one of the following",
        ),
        (
            {
                "cv_results": {
                    "estimator": "dummy",
                    "indices": {"test": [[1, 2], [1, 2]], "train": [[3, 4], [3, 4]]},
                },
                # `X` wrong length
                "X": np.array([[1, 2]]),
                "y": np.array([0, 1]),
                "sample_weight": None,
                "pos_label": None,
            },
            "`X` does not contain the correct number of",
        ),
        (
            {
                "cv_results": {
                    "estimator": "dummy",
                    "indices": {"test": [[1, 2], [1, 2]], "train": [[3, 4], [3, 4]]},
                },
                "X": np.array([1, 2, 3, 4]),
                # `y` not binary
                "y": np.array([0, 2, 1, 3]),
                "sample_weight": None,
                "pos_label": None,
            },
            "The target `y` is not binary",
        ),
        (
            {
                "cv_results": {
                    "estimator": "dummy",
                    "indices": {"test": [[1, 2], [1, 2]], "train": [[3, 4], [3, 4]]},
                },
                "X": np.array([1, 2, 3, 4]),
                "y": np.array([0, 1, 0, 1]),
                # `sample_weight` wrong length
                "sample_weight": np.array([0.5]),
                "pos_label": None,
            },
            "Found input variables with inconsistent",
        ),
        (
            {
                "cv_results": {
                    "estimator": "dummy",
                    "indices": {"test": [[1, 2], [1, 2]], "train": [[3, 4], [3, 4]]},
                },
                "X": np.array([1, 2, 3, 4]),
                "y": np.array([2, 3, 2, 3]),
                "sample_weight": None,
                # Not specified when `y` not in {0, 1} or {-1, 1}
                "pos_label": None,
            },
            "y takes value in {2, 3} and pos_label is not specified",
        ),
    ],
)
def test_validate_from_cv_results_params(pyplot, params, err_msg):
    """Check parameter validation is performed correctly."""
    with pytest.raises(ValueError, match=err_msg):
        _BinaryClassifierCurveDisplayMixin()._validate_from_cv_results_params(**params)


@pytest.mark.parametrize(
    "curve_legend_metric, curve_name, expected_label",
    [
        (0.85, None, "AUC = 0.85"),
        (None, "Model A", "Model A"),
        (0.95, "Random Forest", "Random Forest (AUC = 0.95)"),
        (None, None, None),
    ],
)
def test_get_legend_label(curve_legend_metric, curve_name, expected_label):
    """Check `_get_legend_label` returns the correct label."""
    legend_metric_name = "AUC"
    label = _BinaryClassifierCurveDisplayMixin._get_legend_label(
        curve_legend_metric, curve_name, legend_metric_name
    )
    assert label == expected_label


# TODO(1.9) : Remove
@pytest.mark.parametrize("curve_kwargs", [{"alpha": 1.0}, None])
@pytest.mark.parametrize("kwargs", [{}, {"alpha": 1.0}])
def test_validate_curve_kwargs_deprecate_kwargs(curve_kwargs, kwargs):
    """Check `_validate_curve_kwargs` deprecates kwargs correctly."""
    n_curves = 1
    name = None
    legend_metric = {"mean": 0.8, "std": 0.1}
    legend_metric_name = "AUC"

    if curve_kwargs and kwargs:
        with pytest.raises(ValueError, match="Cannot provide both `curve_kwargs`"):
            _BinaryClassifierCurveDisplayMixin._validate_curve_kwargs(
                n_curves,
                name,
                legend_metric,
                legend_metric_name,
                curve_kwargs,
                **kwargs,
            )
    elif kwargs:
        with pytest.warns(FutureWarning, match=r"`\*\*kwargs` is deprecated and"):
            _BinaryClassifierCurveDisplayMixin._validate_curve_kwargs(
                n_curves,
                name,
                legend_metric,
                legend_metric_name,
                curve_kwargs,
                **kwargs,
            )
    else:
        # No warning or error should be raised
        _BinaryClassifierCurveDisplayMixin._validate_curve_kwargs(
            n_curves, name, legend_metric, legend_metric_name, curve_kwargs, **kwargs
        )


def test_validate_curve_kwargs_error():
    """Check `_validate_curve_kwargs` performs parameter validation correctly."""
    n_curves = 3
    legend_metric = {"mean": 0.8, "std": 0.1}
    legend_metric_name = "AUC"
    with pytest.raises(ValueError, match="`curve_kwargs` must be None"):
        _BinaryClassifierCurveDisplayMixin._validate_curve_kwargs(
            n_curves=n_curves,
            name=None,
            legend_metric=legend_metric,
            legend_metric_name=legend_metric_name,
            curve_kwargs=[{"alpha": 1.0}],
        )
    with pytest.raises(ValueError, match="To avoid labeling individual curves"):
        name = ["one", "two", "three"]
        _BinaryClassifierCurveDisplayMixin._validate_curve_kwargs(
            n_curves=n_curves,
            name=name,
            legend_metric=legend_metric,
            legend_metric_name=legend_metric_name,
            curve_kwargs=None,
        )
        _BinaryClassifierCurveDisplayMixin._validate_curve_kwargs(
            n_curves=n_curves,
            name=name,
            legend_metric=legend_metric,
            legend_metric_name=legend_metric_name,
            curve_kwargs={"alpha": 1.0},
        )


@pytest.mark.parametrize("name", [None, "curve_name", ["curve_name"]])
@pytest.mark.parametrize(
    "legend_metric",
    [
        {"mean": 0.8, "std": 0.2},
        {"mean": None, "std": None},
    ],
)
@pytest.mark.parametrize("legend_metric_name", ["AUC", "AP"])
@pytest.mark.parametrize(
    "curve_kwargs",
    [
        None,
        {"color": "red"},
    ],
)
def test_validate_curve_kwargs_single_legend(
    name, legend_metric, legend_metric_name, curve_kwargs
):
    """Check `_validate_curve_kwargs` returns correct kwargs for single legend entry."""
    n_curves = 3
    curve_kwargs_out = _BinaryClassifierCurveDisplayMixin._validate_curve_kwargs(
        n_curves=n_curves,
        name=name,
        legend_metric=legend_metric,
        legend_metric_name=legend_metric_name,
        curve_kwargs=curve_kwargs,
    )

    assert isinstance(curve_kwargs_out, list)
    assert len(curve_kwargs_out) == n_curves

    expected_label = None
    if isinstance(name, list):
        name = name[0]
    if name is not None:
        expected_label = name
        if legend_metric["mean"] is not None:
            expected_label = expected_label + f" ({legend_metric_name} = 0.80 +/- 0.20)"
    # `name` is None
    elif legend_metric["mean"] is not None:
        expected_label = f"{legend_metric_name} = 0.80 +/- 0.20"

    assert curve_kwargs_out[0]["label"] == expected_label
    # All remaining curves should have None as "label"
    assert curve_kwargs_out[1]["label"] is None
    assert curve_kwargs_out[2]["label"] is None

    # Default multi-curve kwargs
    if curve_kwargs is None:
        assert all(len(kwargs) == 4 for kwargs in curve_kwargs_out)
        assert all(kwargs["alpha"] == 0.5 for kwargs in curve_kwargs_out)
        assert all(kwargs["linestyle"] == "--" for kwargs in curve_kwargs_out)
        assert all(kwargs["color"] == "blue" for kwargs in curve_kwargs_out)
    else:
        assert all(len(kwargs) == 2 for kwargs in curve_kwargs_out)
        assert all(kwargs["color"] == "red" for kwargs in curve_kwargs_out)


@pytest.mark.parametrize("name", [None, "curve_name", ["one", "two", "three"]])
@pytest.mark.parametrize(
    "legend_metric", [{"metric": [1.0, 1.0, 1.0]}, {"metric": [None, None, None]}]
)
@pytest.mark.parametrize("legend_metric_name", ["AUC", "AP"])
def test_validate_curve_kwargs_multi_legend(name, legend_metric, legend_metric_name):
    """Check `_validate_curve_kwargs` returns correct kwargs for multi legend entry."""
    n_curves = 3
    curve_kwargs = [{"color": "red"}, {"color": "yellow"}, {"color": "blue"}]
    curve_kwargs_out = _BinaryClassifierCurveDisplayMixin._validate_curve_kwargs(
        n_curves=n_curves,
        name=name,
        legend_metric=legend_metric,
        legend_metric_name=legend_metric_name,
        curve_kwargs=curve_kwargs,
    )

    assert isinstance(curve_kwargs_out, list)
    assert len(curve_kwargs_out) == n_curves

    expected_labels = [None, None, None]
    if isinstance(name, str):
        expected_labels = "curve_name"
        if legend_metric["metric"][0] is not None:
            expected_labels = expected_labels + f" ({legend_metric_name} = 1.00)"
        expected_labels = [expected_labels] * n_curves
    elif isinstance(name, list) and legend_metric["metric"][0] is None:
        expected_labels = name
    elif isinstance(name, list) and legend_metric["metric"][0] is not None:
        expected_labels = [
            f"{name_single} ({legend_metric_name} = 1.00)" for name_single in name
        ]
    # `name` is None
    elif legend_metric["metric"][0] is not None:
        expected_labels = [f"{legend_metric_name} = 1.00"] * n_curves

    for idx, expected_label in enumerate(expected_labels):
        assert curve_kwargs_out[idx]["label"] == expected_label

    assert all(len(kwargs) == 2 for kwargs in curve_kwargs_out)
    for curve_kwarg, curve_kwarg_out in zip(curve_kwargs, curve_kwargs_out):
        assert curve_kwarg_out["color"] == curve_kwarg["color"]


def metric():
    pass  # pragma: no cover


def neg_metric():
    pass  # pragma: no cover


@pytest.mark.parametrize(
    "score_name, scoring, negate_score, expected_score_name",
    [
        ("accuracy", None, False, "accuracy"),  # do not transform the name
        (None, "accuracy", False, "Accuracy"),  # capitalize the name
        (None, "accuracy", True, "Negative accuracy"),  # add "Negative"
        (None, "neg_mean_absolute_error", False, "Negative mean absolute error"),
        (None, "neg_mean_absolute_error", True, "Mean absolute error"),  # remove "neg_"
        ("MAE", "neg_mean_absolute_error", True, "MAE"),  # keep score_name
        (None, None, False, "Score"),  # default name
        (None, None, True, "Negative score"),  # default name but negated
        ("Some metric", metric, False, "Some metric"),  # do not transform the name
        ("Some metric", metric, True, "Some metric"),  # do not transform the name
        (None, metric, False, "Metric"),  # default name
        (None, metric, True, "Negative metric"),  # default name but negated
        ("Some metric", neg_metric, False, "Some metric"),  # do not transform the name
        ("Some metric", neg_metric, True, "Some metric"),  # do not transform the name
        (None, neg_metric, False, "Negative metric"),  # default name
        (None, neg_metric, True, "Metric"),  # default name but negated
    ],
)
def test_validate_score_name(score_name, scoring, negate_score, expected_score_name):
    """Check that we return the right score name."""
    assert (
        _validate_score_name(score_name, scoring, negate_score) == expected_score_name
    )


# In the following test, we check the value of the max to min ratio
# for parameter value intervals to check that using a decision threshold
# of 5. is a good heuristic to decide between linear and log scales on
# common ranges of parameter values.
@pytest.mark.parametrize(
    "data, lower_bound, upper_bound",
    [
        # Such a range could be clearly displayed with either log scale or linear
        # scale.
        (np.geomspace(0.1, 1, 5), 5, 6),
        # Checking that the ratio is still positive on a negative log scale.
        (-np.geomspace(0.1, 1, 10), 7, 8),
        # Evenly spaced parameter values lead to a ratio of 1.
        (np.linspace(0, 1, 5), 0.9, 1.1),
        # This is not exactly spaced on a log scale but we will benefit from treating
        # it as such for visualization.
        ([1, 2, 5, 10, 20, 50], 20, 40),
    ],
)
def test_inverval_max_min_ratio(data, lower_bound, upper_bound):
    assert lower_bound < _interval_max_min_ratio(data) < upper_bound


@pytest.mark.parametrize(
    "default_kwargs, user_kwargs, expected",
    [
        (
            {"color": "blue", "linewidth": 2},
            {"linestyle": "dashed"},
            {"color": "blue", "linewidth": 2, "linestyle": "dashed"},
        ),
        (
            {"color": "blue", "linestyle": "solid"},
            {"c": "red", "ls": "dashed"},
            {"color": "red", "linestyle": "dashed"},
        ),
        (
            {"label": "xxx", "color": "k", "linestyle": "--"},
            {"ls": "-."},
            {"label": "xxx", "color": "k", "linestyle": "-."},
        ),
        ({}, {}, {}),
        (
            {},
            {
                "ls": "dashed",
                "c": "red",
                "ec": "black",
                "fc": "yellow",
                "lw": 2,
                "mec": "green",
                "mfcalt": "blue",
                "ms": 5,
            },
            {
                "linestyle": "dashed",
                "color": "red",
                "edgecolor": "black",
                "facecolor": "yellow",
                "linewidth": 2,
                "markeredgecolor": "green",
                "markerfacecoloralt": "blue",
                "markersize": 5,
            },
        ),
    ],
)
def test_validate_style_kwargs(default_kwargs, user_kwargs, expected):
    """Check the behaviour of `validate_style_kwargs` with various type of entries."""
    result = _validate_style_kwargs(default_kwargs, user_kwargs)
    assert result == expected, (
        "The validation of style keywords does not provide the expected results: "
        f"Got {result} instead of {expected}."
    )


@pytest.mark.parametrize(
    "default_kwargs, user_kwargs",
    [({}, {"ls": 2, "linestyle": 3}), ({}, {"c": "r", "color": "blue"})],
)
def test_validate_style_kwargs_error(default_kwargs, user_kwargs):
    """Check that `validate_style_kwargs` raises TypeError"""
    with pytest.raises(TypeError):
        _validate_style_kwargs(default_kwargs, user_kwargs)


def test_despine(pyplot):
    ax = pyplot.gca()
    _despine(ax)
    assert ax.spines["top"].get_visible() is False
    assert ax.spines["right"].get_visible() is False
    assert ax.spines["bottom"].get_bounds() == (0, 1)
    assert ax.spines["left"].get_bounds() == (0, 1)


@pytest.mark.parametrize("estimator_name", ["my_est_name", "deprecated"])
@pytest.mark.parametrize("name", [None, "my_name"])
def test_deprecate_estimator_name(estimator_name, name):
    """Check `_deprecate_estimator_name` behaves correctly"""
    version = "1.7"
    version_remove = "1.9"

    if estimator_name == "deprecated":
        name_out = _deprecate_estimator_name(estimator_name, name, version)
        assert name_out == name
    # `estimator_name` is provided and `name` is:
    elif name is None:
        warning_message = (
            f"`estimator_name` is deprecated in {version} and will be removed in "
            f"{version_remove}. Use `name` instead."
        )
        with pytest.warns(FutureWarning, match=warning_message):
            result = _deprecate_estimator_name(estimator_name, name, version)
        assert result == estimator_name
    elif name is not None:
        error_message = (
            f"Cannot provide both `estimator_name` and `name`. `estimator_name` "
            f"is deprecated in {version} and will be removed in {version_remove}. "
        )
        with pytest.raises(ValueError, match=error_message):
            _deprecate_estimator_name(estimator_name, name, version)