File: test_predict_error_display.py

package info (click to toggle)
scikit-learn 1.4.2%2Bdfsg-8
links: PTS, VCS
area: main
in suites: trixie
size: 25,036 kB
sloc: python: 201,105; cpp: 5,790; ansic: 854; makefile: 304; sh: 56; javascript: 20
file content (161 lines) | stat: -rw-r--r-- 5,786 bytes
import pytest
from numpy.testing import assert_allclose

from sklearn.datasets import load_diabetes
from sklearn.exceptions import NotFittedError
from sklearn.linear_model import Ridge
from sklearn.metrics import PredictionErrorDisplay

X, y = load_diabetes(return_X_y=True)


@pytest.fixture
def regressor_fitted():
    return Ridge().fit(X, y)


@pytest.mark.parametrize(
    "regressor, params, err_type, err_msg",
    [
        (
            Ridge().fit(X, y),
            {"subsample": -1},
            ValueError,
            "When an integer, subsample=-1 should be",
        ),
        (
            Ridge().fit(X, y),
            {"subsample": 20.0},
            ValueError,
            "When a floating-point, subsample=20.0 should be",
        ),
        (
            Ridge().fit(X, y),
            {"subsample": -20.0},
            ValueError,
            "When a floating-point, subsample=-20.0 should be",
        ),
        (
            Ridge().fit(X, y),
            {"kind": "xxx"},
            ValueError,
            "`kind` must be one of",
        ),
    ],
)
@pytest.mark.parametrize("class_method", ["from_estimator", "from_predictions"])
def test_prediction_error_display_raise_error(
    pyplot, class_method, regressor, params, err_type, err_msg
):
    """Check that we raise the proper error when making the parameters
    # validation."""
    with pytest.raises(err_type, match=err_msg):
        if class_method == "from_estimator":
            PredictionErrorDisplay.from_estimator(regressor, X, y, **params)
        else:
            y_pred = regressor.predict(X)
            PredictionErrorDisplay.from_predictions(y_true=y, y_pred=y_pred, **params)


def test_from_estimator_not_fitted(pyplot):
    """Check that we raise a `NotFittedError` when the passed regressor is not
    fit."""
    regressor = Ridge()
    with pytest.raises(NotFittedError, match="is not fitted yet."):
        PredictionErrorDisplay.from_estimator(regressor, X, y)


@pytest.mark.parametrize("class_method", ["from_estimator", "from_predictions"])
@pytest.mark.parametrize("kind", ["actual_vs_predicted", "residual_vs_predicted"])
def test_prediction_error_display(pyplot, regressor_fitted, class_method, kind):
    """Check the default behaviour of the display."""
    if class_method == "from_estimator":
        display = PredictionErrorDisplay.from_estimator(
            regressor_fitted, X, y, kind=kind
        )
    else:
        y_pred = regressor_fitted.predict(X)
        display = PredictionErrorDisplay.from_predictions(
            y_true=y, y_pred=y_pred, kind=kind
        )

    if kind == "actual_vs_predicted":
        assert_allclose(display.line_.get_xdata(), display.line_.get_ydata())
        assert display.ax_.get_xlabel() == "Predicted values"
        assert display.ax_.get_ylabel() == "Actual values"
        assert display.line_ is not None
    else:
        assert display.ax_.get_xlabel() == "Predicted values"
        assert display.ax_.get_ylabel() == "Residuals (actual - predicted)"
        assert display.line_ is not None

    assert display.ax_.get_legend() is None


@pytest.mark.parametrize("class_method", ["from_estimator", "from_predictions"])
@pytest.mark.parametrize(
    "subsample, expected_size",
    [(5, 5), (0.1, int(X.shape[0] * 0.1)), (None, X.shape[0])],
)
def test_plot_prediction_error_subsample(
    pyplot, regressor_fitted, class_method, subsample, expected_size
):
    """Check the behaviour of `subsample`."""
    if class_method == "from_estimator":
        display = PredictionErrorDisplay.from_estimator(
            regressor_fitted, X, y, subsample=subsample
        )
    else:
        y_pred = regressor_fitted.predict(X)
        display = PredictionErrorDisplay.from_predictions(
            y_true=y, y_pred=y_pred, subsample=subsample
        )
    assert len(display.scatter_.get_offsets()) == expected_size


@pytest.mark.parametrize("class_method", ["from_estimator", "from_predictions"])
def test_plot_prediction_error_ax(pyplot, regressor_fitted, class_method):
    """Check that we can pass an axis to the display."""
    _, ax = pyplot.subplots()
    if class_method == "from_estimator":
        display = PredictionErrorDisplay.from_estimator(regressor_fitted, X, y, ax=ax)
    else:
        y_pred = regressor_fitted.predict(X)
        display = PredictionErrorDisplay.from_predictions(
            y_true=y, y_pred=y_pred, ax=ax
        )
    assert display.ax_ is ax


@pytest.mark.parametrize("class_method", ["from_estimator", "from_predictions"])
def test_prediction_error_custom_artist(pyplot, regressor_fitted, class_method):
    """Check that we can tune the style of the lines."""
    extra_params = {
        "kind": "actual_vs_predicted",
        "scatter_kwargs": {"color": "red"},
        "line_kwargs": {"color": "black"},
    }
    if class_method == "from_estimator":
        display = PredictionErrorDisplay.from_estimator(
            regressor_fitted, X, y, **extra_params
        )
    else:
        y_pred = regressor_fitted.predict(X)
        display = PredictionErrorDisplay.from_predictions(
            y_true=y, y_pred=y_pred, **extra_params
        )

    assert display.line_.get_color() == "black"
    assert_allclose(display.scatter_.get_edgecolor(), [[1.0, 0.0, 0.0, 0.8]])

    # create a display with the default values
    if class_method == "from_estimator":
        display = PredictionErrorDisplay.from_estimator(regressor_fitted, X, y)
    else:
        y_pred = regressor_fitted.predict(X)
        display = PredictionErrorDisplay.from_predictions(y_true=y, y_pred=y_pred)
    pyplot.close("all")

    display.plot(**extra_params)
    assert display.line_.get_color() == "black"
    assert_allclose(display.scatter_.get_edgecolor(), [[1.0, 0.0, 0.0, 0.8]])