1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
import numpy as np
from joblib import Parallel, delayed
from sklearn.base import BaseEstimator, RegressorMixin, clone
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.utils import check_random_state
def _parallel_fit(regressor, X, y):
return regressor.fit(X, y)
class GradientBoostingQuantileRegressor(RegressorMixin, BaseEstimator):
"""Predict several quantiles with one estimator.
This is a wrapper around `GradientBoostingRegressor`'s quantile
regression that allows you to predict several `quantiles` in
one go.
Parameters
----------
quantiles : array-like
Quantiles to predict. By default the 16, 50 and 84%
quantiles are predicted.
base_estimator : GradientBoostingRegressor instance or None (default)
Quantile regressor used to make predictions. Only instances
of `GradientBoostingRegressor` are supported. Use this to change
the hyper-parameters of the estimator.
n_jobs : int, default=1
The number of jobs to run in parallel for `fit`.
If -1, then the number of jobs is set to the number of cores.
random_state : int, RandomState instance, or None (default)
Set random state to something other than None for reproducible
results.
"""
def __init__(
self,
quantiles=None,
base_estimator=None,
n_jobs=1,
random_state=None,
):
if quantiles is None:
quantiles = [0.16, 0.5, 0.84]
self.quantiles = quantiles
self.random_state = random_state
self.base_estimator = base_estimator
self.n_jobs = n_jobs
def fit(self, X, y):
"""Fit one regressor for each quantile.
Parameters
----------
X : array-like, shape=(n_samples, n_features)
Training vectors, where `n_samples` is the number of samples
and `n_features` is the number of features.
y : array-like, shape=(n_samples,)
Target values (real numbers in regression)
"""
rng = check_random_state(self.random_state)
if self.base_estimator is None:
base_estimator = GradientBoostingRegressor(loss='quantile')
else:
base_estimator = self.base_estimator
if not isinstance(base_estimator, GradientBoostingRegressor):
raise ValueError(
'base_estimator has to be of type' ' GradientBoostingRegressor.'
)
if not base_estimator.loss == 'quantile':
raise ValueError(
'base_estimator has to use quantile'
' loss not %s' % base_estimator.loss
)
# The predictions for different quantiles should be sorted.
# Therefore each of the regressors need the same seed.
base_estimator.set_params(random_state=rng)
regressors = []
for q in self.quantiles:
regressor = clone(base_estimator)
regressor.set_params(alpha=q)
regressors.append(regressor)
self.regressors_ = Parallel(n_jobs=self.n_jobs, backend='threading')(
delayed(_parallel_fit)(regressor, X, y) for regressor in regressors
)
return self
def predict(self, X, return_std=False, return_quantiles=False):
"""Predict.
Predict `X` at every quantile if `return_std` is set to False.
If `return_std` is set to True, then return the mean
and the predicted standard deviation, which is approximated as
the (0.84th quantile - 0.16th quantile) divided by 2.0
Parameters
----------
X : array-like, shape=(n_samples, n_features)
where `n_samples` is the number of samples
and `n_features` is the number of features.
"""
predicted_quantiles = np.asarray([rgr.predict(X) for rgr in self.regressors_])
if return_quantiles:
return predicted_quantiles.T
elif return_std:
std_quantiles = [0.16, 0.5, 0.84]
is_present_mask = np.in1d(std_quantiles, self.quantiles)
if not np.all(is_present_mask):
raise ValueError(
"return_std works only if the quantiles during "
"instantiation include 0.16, 0.5 and 0.84"
)
low = self.regressors_[self.quantiles.index(0.16)].predict(X)
high = self.regressors_[self.quantiles.index(0.84)].predict(X)
mean = self.regressors_[self.quantiles.index(0.5)].predict(X)
return mean, ((high - low) / 2.0)
# return the mean
return self.regressors_[self.quantiles.index(0.5)].predict(X)
|