1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
import numpy as np
from ..base import BaseEstimator, ClassifierMixin
from .validation import _num_samples, check_array
class ArraySlicingWrapper:
"""
Parameters
----------
array
"""
def __init__(self, array):
self.array = array
def __getitem__(self, aslice):
return MockDataFrame(self.array[aslice])
class MockDataFrame:
"""
Parameters
----------
array
"""
# have shape and length but don't support indexing.
def __init__(self, array):
self.array = array
self.values = array
self.shape = array.shape
self.ndim = array.ndim
# ugly hack to make iloc work.
self.iloc = ArraySlicingWrapper(array)
def __len__(self):
return len(self.array)
def __array__(self, dtype=None):
# Pandas data frames also are array-like: we want to make sure that
# input validation in cross-validation does not try to call that
# method.
return self.array
def __eq__(self, other):
return MockDataFrame(self.array == other.array)
def __ne__(self, other):
return not self == other
class CheckingClassifier(ClassifierMixin, BaseEstimator):
"""Dummy classifier to test pipelining and meta-estimators.
Checks some property of X and y in fit / predict.
This allows testing whether pipelines / cross-validation or metaestimators
changed the input.
Parameters
----------
check_y
check_X
foo_param
expected_fit_params
Attributes
----------
classes_
"""
def __init__(self, check_y=None, check_X=None, foo_param=0,
expected_fit_params=None):
self.check_y = check_y
self.check_X = check_X
self.foo_param = foo_param
self.expected_fit_params = expected_fit_params
def fit(self, X, y, **fit_params):
"""
Fit classifier
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training vector, where n_samples is the number of samples and
n_features is the number of features.
y : array-like of shape (n_samples, n_output) or (n_samples,), optional
Target relative to X for classification or regression;
None for unsupervised learning.
**fit_params : dict of string -> object
Parameters passed to the ``fit`` method of the estimator
"""
assert len(X) == len(y)
if self.check_X is not None:
assert self.check_X(X)
if self.check_y is not None:
assert self.check_y(y)
self.n_features_in_ = len(X)
self.classes_ = np.unique(check_array(y, ensure_2d=False,
allow_nd=True))
if self.expected_fit_params:
missing = set(self.expected_fit_params) - set(fit_params)
assert len(missing) == 0, 'Expected fit parameter(s) %s not ' \
'seen.' % list(missing)
for key, value in fit_params.items():
assert len(value) == len(X), (
'Fit parameter %s has length %d; '
'expected %d.'
% (key, len(value), len(X)))
return self
def predict(self, T):
"""
Parameters
----------
T : indexable, length n_samples
"""
if self.check_X is not None:
assert self.check_X(T)
return self.classes_[np.zeros(_num_samples(T), dtype=np.int)]
def score(self, X=None, Y=None):
"""
Parameters
----------
X : array-like of shape (n_samples, n_features)
Input data, where n_samples is the number of samples and
n_features is the number of features.
Y : array-like of shape (n_samples, n_output) or (n_samples,), optional
Target relative to X for classification or regression;
None for unsupervised learning.
"""
if self.foo_param > 1:
score = 1.
else:
score = 0.
return score
def _more_tags(self):
return {'_skip_test': True, 'X_types': ['1dlabel']}
class NoSampleWeightWrapper(BaseEstimator):
"""Wrap estimator which will not expose `sample_weight`.
Parameters
----------
est : estimator, default=None
The estimator to wrap.
"""
def __init__(self, est=None):
self.est = est
def fit(self, X, y):
return self.est.fit(X, y)
def predict(self, X):
return self.est.predict(X)
def predict_proba(self, X):
return self.est.predict_proba(X)
def _more_tags(self):
return {'_skip_test': True}
|