1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
|
"""
Testing for the partial dependence module.
"""
import pytest
import numpy as np
from numpy.testing import assert_array_equal
from sklearn.utils.testing import assert_raises
from sklearn.utils.testing import if_matplotlib
from sklearn.ensemble.partial_dependence import partial_dependence
from sklearn.ensemble.partial_dependence import plot_partial_dependence
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import datasets
# toy sample
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
y = [-1, -1, -1, 1, 1, 1]
T = [[-1, -1], [2, 2], [3, 2]]
true_result = [-1, 1, 1]
# also load the boston dataset
boston = datasets.load_boston()
# also load the iris dataset
iris = datasets.load_iris()
def test_partial_dependence_classifier():
# Test partial dependence for classifier
clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
clf.fit(X, y)
pdp, axes = partial_dependence(clf, [0], X=X, grid_resolution=5)
# only 4 grid points instead of 5 because only 4 unique X[:,0] vals
assert pdp.shape == (1, 4)
assert axes[0].shape[0] == 4
# now with our own grid
X_ = np.asarray(X)
grid = np.unique(X_[:, 0])
pdp_2, axes = partial_dependence(clf, [0], grid=grid)
assert axes is None
assert_array_equal(pdp, pdp_2)
def test_partial_dependence_multiclass():
# Test partial dependence for multi-class classifier
clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
clf.fit(iris.data, iris.target)
grid_resolution = 25
n_classes = clf.n_classes_
pdp, axes = partial_dependence(
clf, [0], X=iris.data, grid_resolution=grid_resolution)
assert pdp.shape == (n_classes, grid_resolution)
assert len(axes) == 1
assert axes[0].shape[0] == grid_resolution
def test_partial_dependence_regressor():
# Test partial dependence for regressor
clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
clf.fit(boston.data, boston.target)
grid_resolution = 25
pdp, axes = partial_dependence(
clf, [0], X=boston.data, grid_resolution=grid_resolution)
assert pdp.shape == (1, grid_resolution)
assert axes[0].shape[0] == grid_resolution
def test_partial_dependecy_input():
# Test input validation of partial dependence.
clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
clf.fit(X, y)
assert_raises(ValueError, partial_dependence,
clf, [0], grid=None, X=None)
assert_raises(ValueError, partial_dependence,
clf, [0], grid=[0, 1], X=X)
# first argument must be an instance of BaseGradientBoosting
assert_raises(ValueError, partial_dependence,
{}, [0], X=X)
# Gradient boosting estimator must be fit
assert_raises(ValueError, partial_dependence,
GradientBoostingClassifier(), [0], X=X)
assert_raises(ValueError, partial_dependence, clf, [-1], X=X)
assert_raises(ValueError, partial_dependence, clf, [100], X=X)
# wrong ndim for grid
grid = np.random.rand(10, 2, 1)
assert_raises(ValueError, partial_dependence, clf, [0], grid=grid)
@pytest.mark.filterwarnings('ignore: Using or importing the ABCs from')
# matplotlib Python3.7 warning
@if_matplotlib
def test_plot_partial_dependence():
# Test partial dependence plot function.
clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
clf.fit(boston.data, boston.target)
grid_resolution = 25
fig, axs = plot_partial_dependence(clf, boston.data, [0, 1, (0, 1)],
grid_resolution=grid_resolution,
feature_names=boston.feature_names)
assert len(axs) == 3
assert all(ax.has_data for ax in axs)
# check with str features and array feature names
fig, axs = plot_partial_dependence(clf, boston.data, ['CRIM', 'ZN',
('CRIM', 'ZN')],
grid_resolution=grid_resolution,
feature_names=boston.feature_names)
assert len(axs) == 3
assert all(ax.has_data for ax in axs)
# check with list feature_names
feature_names = boston.feature_names.tolist()
fig, axs = plot_partial_dependence(clf, boston.data, ['CRIM', 'ZN',
('CRIM', 'ZN')],
grid_resolution=grid_resolution,
feature_names=feature_names)
assert len(axs) == 3
assert all(ax.has_data for ax in axs)
@pytest.mark.filterwarnings('ignore: Using or importing the ABCs from')
# matplotlib Python3.7 warning
@if_matplotlib
def test_plot_partial_dependence_input():
# Test partial dependence plot function input checks.
clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
# not fitted yet
assert_raises(ValueError, plot_partial_dependence,
clf, X, [0])
clf.fit(X, y)
assert_raises(ValueError, plot_partial_dependence,
clf, np.array(X)[:, :0], [0])
# first argument must be an instance of BaseGradientBoosting
assert_raises(ValueError, plot_partial_dependence,
{}, X, [0])
# must be larger than -1
assert_raises(ValueError, plot_partial_dependence,
clf, X, [-1])
# too large feature value
assert_raises(ValueError, plot_partial_dependence,
clf, X, [100])
# str feature but no feature_names
assert_raises(ValueError, plot_partial_dependence,
clf, X, ['foobar'])
# not valid features value
assert_raises(ValueError, plot_partial_dependence,
clf, X, [{'foo': 'bar'}])
@pytest.mark.filterwarnings('ignore: Using or importing the ABCs from')
# matplotlib Python3.7 warning
@if_matplotlib
def test_plot_partial_dependence_multiclass():
# Test partial dependence plot function on multi-class input.
clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
clf.fit(iris.data, iris.target)
grid_resolution = 25
fig, axs = plot_partial_dependence(clf, iris.data, [0, 1],
label=0,
grid_resolution=grid_resolution)
assert len(axs) == 2
assert all(ax.has_data for ax in axs)
# now with symbol labels
target = iris.target_names[iris.target]
clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
clf.fit(iris.data, target)
grid_resolution = 25
fig, axs = plot_partial_dependence(clf, iris.data, [0, 1],
label='setosa',
grid_resolution=grid_resolution)
assert len(axs) == 2
assert all(ax.has_data for ax in axs)
# label not in gbrt.classes_
assert_raises(ValueError, plot_partial_dependence,
clf, iris.data, [0, 1], label='foobar',
grid_resolution=grid_resolution)
# label not provided
assert_raises(ValueError, plot_partial_dependence,
clf, iris.data, [0, 1],
grid_resolution=grid_resolution)
|