1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
import pytest
from sklearn.base import ClassifierMixin, clone
from sklearn.compose import make_column_transformer
from sklearn.datasets import load_iris
from sklearn.exceptions import NotFittedError
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (
DetCurveDisplay,
PrecisionRecallDisplay,
RocCurveDisplay,
)
@pytest.fixture(scope="module")
def data():
return load_iris(return_X_y=True)
@pytest.fixture(scope="module")
def data_binary(data):
X, y = data
return X[y < 2], y[y < 2]
@pytest.mark.parametrize(
"Display", [DetCurveDisplay, PrecisionRecallDisplay, RocCurveDisplay]
)
def test_display_curve_error_non_binary(pyplot, data, Display):
"""Check that a proper error is raised when only binary classification is
supported."""
X, y = data
clf = DecisionTreeClassifier().fit(X, y)
msg = (
"Expected 'estimator' to be a binary classifier, but got DecisionTreeClassifier"
)
with pytest.raises(ValueError, match=msg):
Display.from_estimator(clf, X, y)
@pytest.mark.parametrize(
"response_method, msg",
[
(
"predict_proba",
"response method predict_proba is not defined in MyClassifier",
),
(
"decision_function",
"response method decision_function is not defined in MyClassifier",
),
(
"auto",
"response method decision_function or predict_proba is not "
"defined in MyClassifier",
),
(
"bad_method",
"response_method must be 'predict_proba', 'decision_function' or 'auto'",
),
],
)
@pytest.mark.parametrize(
"Display", [DetCurveDisplay, PrecisionRecallDisplay, RocCurveDisplay]
)
def test_display_curve_error_no_response(
pyplot,
data_binary,
response_method,
msg,
Display,
):
"""Check that a proper error is raised when the response method requested
is not defined for the given trained classifier."""
X, y = data_binary
class MyClassifier(ClassifierMixin):
def fit(self, X, y):
self.classes_ = [0, 1]
return self
clf = MyClassifier().fit(X, y)
with pytest.raises(ValueError, match=msg):
Display.from_estimator(clf, X, y, response_method=response_method)
@pytest.mark.parametrize(
"Display", [DetCurveDisplay, PrecisionRecallDisplay, RocCurveDisplay]
)
@pytest.mark.parametrize("constructor_name", ["from_estimator", "from_predictions"])
def test_display_curve_estimator_name_multiple_calls(
pyplot,
data_binary,
Display,
constructor_name,
):
"""Check that passing `name` when calling `plot` will overwrite the original name
in the legend."""
X, y = data_binary
clf_name = "my hand-crafted name"
clf = LogisticRegression().fit(X, y)
y_pred = clf.predict_proba(X)[:, 1]
# safe guard for the binary if/else construction
assert constructor_name in ("from_estimator", "from_predictions")
if constructor_name == "from_estimator":
disp = Display.from_estimator(clf, X, y, name=clf_name)
else:
disp = Display.from_predictions(y, y_pred, name=clf_name)
assert disp.estimator_name == clf_name
pyplot.close("all")
disp.plot()
assert clf_name in disp.line_.get_label()
pyplot.close("all")
clf_name = "another_name"
disp.plot(name=clf_name)
assert clf_name in disp.line_.get_label()
@pytest.mark.parametrize(
"clf",
[
LogisticRegression(),
make_pipeline(StandardScaler(), LogisticRegression()),
make_pipeline(
make_column_transformer((StandardScaler(), [0, 1])), LogisticRegression()
),
],
)
@pytest.mark.parametrize(
"Display", [DetCurveDisplay, PrecisionRecallDisplay, RocCurveDisplay]
)
def test_display_curve_not_fitted_errors(pyplot, data_binary, clf, Display):
"""Check that a proper error is raised when the classifier is not
fitted."""
X, y = data_binary
# clone since we parametrize the test and the classifier will be fitted
# when testing the second and subsequent plotting function
model = clone(clf)
with pytest.raises(NotFittedError):
Display.from_estimator(model, X, y)
model.fit(X, y)
disp = Display.from_estimator(model, X, y)
assert model.__class__.__name__ in disp.line_.get_label()
assert disp.estimator_name == model.__class__.__name__
|