1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
|
"""
Generalized Linear models.
"""
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
# Fabian Pedregosa <fabian.pedregosa@inria.fr>
# Olivier Grisel <olivier.grisel@ensta.org>
# Vincent Michel <vincent.michel@inria.fr>
# Peter Prettenhofer <peter.prettenhofer@gmail.com>
# Mathieu Blondel <mathieu@mblondel.org>
#
# License: BSD Style.
from abc import ABCMeta, abstractmethod
import numpy as np
import scipy.sparse as sp
from scipy import linalg
import scipy.sparse.linalg as sp_linalg
from ..externals.joblib import Parallel, delayed
from ..base import BaseEstimator
from ..base import RegressorMixin
from ..utils.extmath import safe_sparse_dot
from ..utils import array2d, as_float_array, safe_asarray
from ..utils.fixes import lsqr
###
### TODO: intercept for all models
### We should define a common function to center data instead of
### repeating the same code inside each fit method.
### TODO: bayesian_ridge_regression and bayesian_regression_ard
### should be squashed into its respective objects.
def center_data(X, y, fit_intercept, normalize=False, copy=True):
"""
Centers data to have mean zero along axis 0. This is here because
nearly all linear models will want their data to be centered.
"""
X = as_float_array(X, copy)
if fit_intercept:
if sp.issparse(X):
X_mean = np.zeros(X.shape[1])
X_std = np.ones(X.shape[1])
else:
X_mean = X.mean(axis=0)
X -= X_mean
if normalize:
X_std = np.sqrt(np.sum(X ** 2, axis=0))
X_std[X_std == 0] = 1
X /= X_std
else:
X_std = np.ones(X.shape[1])
y_mean = y.mean(axis=0)
y = y - y_mean
else:
X_mean = np.zeros(X.shape[1])
X_std = np.ones(X.shape[1])
y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
return X, y, X_mean, y_mean, X_std
class LinearModel(BaseEstimator, RegressorMixin):
"""Base class for Linear Models"""
def decision_function(self, X):
"""Decision function of the linear model
Parameters
----------
X : numpy array of shape [n_samples, n_features]
Returns
-------
C : array, shape = [n_samples]
Returns predicted values.
"""
X = safe_asarray(X)
return safe_sparse_dot(X, self.coef_.T) + self.intercept_
def predict(self, X):
"""Predict using the linear model
Parameters
----------
X : numpy array of shape [n_samples, n_features]
Returns
-------
C : array, shape = [n_samples]
Returns predicted values.
"""
return self.decision_function(X)
_center_data = staticmethod(center_data)
def _set_intercept(self, X_mean, y_mean, X_std):
"""Set the intercept_
"""
if self.fit_intercept:
self.coef_ = self.coef_ / X_std
self.intercept_ = y_mean - np.dot(X_mean, self.coef_.T)
else:
self.intercept_ = 0
class LinearRegression(LinearModel):
"""
Ordinary least squares Linear Regression.
Attributes
----------
`coef_` : array
Estimated coefficients for the linear regression problem.
`intercept_` : array
Independent term in the linear model.
Parameters
----------
fit_intercept : boolean, optional
wether to calculate the intercept for this model. If set
to false, no intercept will be used in calculations
(e.g. data is expected to be already centered).
normalize : boolean, optional
If True, the regressors X are normalized
Notes
-----
From the implementation point of view, this is just plain Ordinary
Least Squares (numpy.linalg.lstsq) wrapped as a predictor object.
"""
def __init__(self, fit_intercept=True, normalize=False, copy_X=True):
self.fit_intercept = fit_intercept
self.normalize = normalize
self.copy_X = copy_X
def fit(self, X, y, n_jobs=1):
"""
Fit linear model.
Parameters
----------
X : numpy array or sparse matrix of shape [n_samples,n_features]
Training data
y : numpy array of shape [n_samples, n_responses]
Target values
n_jobs : The number of jobs to use for the computation.
If -1 all CPUs are used. This will only provide speedup for
n_response > 1 and sufficient large problems
Returns
-------
self : returns an instance of self.
"""
X = safe_asarray(X)
y = np.asarray(y)
X, y, X_mean, y_mean, X_std = self._center_data(X, y,
self.fit_intercept, self.normalize, self.copy_X)
if sp.issparse(X):
if y.ndim < 2:
out = lsqr(X, y)
self.coef_ = out[0]
self.residues_ = out[3]
else:
# sparse_lstsq cannot handle y with shape (M, K)
outs = Parallel(n_jobs=n_jobs)(delayed(lsqr)
(X, y[:, j].ravel()) for j in range(y.shape[1]))
self.coef_ = np.vstack(out[0] for out in outs)
self.residues_ = np.vstack(out[3] for out in outs)
else:
self.coef_, self.residues_, self.rank_, self.singular_ = \
linalg.lstsq(X, y)
self.coef_ = self.coef_.T
self._set_intercept(X_mean, y_mean, X_std)
return self
##
## Stochastic Gradient Descent (SGD) abstract base class
##
class BaseSGD(BaseEstimator):
"""Base class for dense and sparse SGD."""
__metaclass__ = ABCMeta
def __init__(self, loss, penalty='l2', alpha=0.0001,
rho=0.85, fit_intercept=True, n_iter=5, shuffle=False,
verbose=0, seed=0, learning_rate="optimal", eta0=0.0,
power_t=0.5, warm_start=False):
self.loss = str(loss)
self.penalty = str(penalty).lower()
self._set_loss_function(self.loss)
self._set_penalty_type(self.penalty)
self.alpha = float(alpha)
if self.alpha < 0.0:
raise ValueError("alpha must be greater than zero")
self.rho = float(rho)
if self.rho < 0.0 or self.rho > 1.0:
raise ValueError("rho must be in [0, 1]")
self.fit_intercept = bool(fit_intercept)
self.n_iter = int(n_iter)
if self.n_iter <= 0:
raise ValueError("n_iter must be greater than zero")
if not isinstance(shuffle, bool):
raise ValueError("shuffle must be either True or False")
self.shuffle = bool(shuffle)
self.seed = seed
self.verbose = int(verbose)
self.learning_rate = str(learning_rate)
self._set_learning_rate(self.learning_rate)
self.eta0 = float(eta0)
self.power_t = float(power_t)
if self.learning_rate != "optimal":
if eta0 <= 0.0:
raise ValueError("eta0 must be greater than 0.0")
self.coef_ = None
self.warm_start = warm_start
self._init_t()
@abstractmethod
def fit(self, X, y):
"""Fit model."""
@abstractmethod
def predict(self, X):
"""Predict using model."""
def _init_t(self):
self.t_ = 1.0
if self.learning_rate == "optimal":
typw = np.sqrt(1.0 / np.sqrt(self.alpha))
# computing eta0, the initial learning rate
eta0 = typw / max(1.0, self.loss_function.dloss(-typw, 1.0))
# initialize t such that eta at first example equals eta0
self.t_ = 1.0 / (eta0 * self.alpha)
def _set_learning_rate(self, learning_rate):
learning_rate_codes = {"constant": 1, "optimal": 2, "invscaling": 3}
try:
self.learning_rate_code = learning_rate_codes[learning_rate]
except KeyError:
raise ValueError("learning rate %s"
"is not supported. " % learning_rate)
def _set_loss_function(self, loss):
"""Get concrete LossFunction"""
raise NotImplementedError("BaseSGD is an abstract class.")
def _set_penalty_type(self, penalty):
penalty_types = {"none": 0, "l2": 2, "l1": 1, "elasticnet": 3}
try:
self.penalty_type = penalty_types[penalty]
except KeyError:
raise ValueError("Penalty %s is not supported. " % penalty)
def _validate_sample_weight(self, sample_weight, n_samples):
"""Set the sample weight array."""
if sample_weight == None:
# uniform sample weights
sample_weight = np.ones(n_samples, dtype=np.float64, order='C')
else:
# user-provided array
sample_weight = np.asarray(sample_weight, dtype=np.float64,
order="C")
if sample_weight.shape[0] != n_samples:
raise ValueError("Shapes of X and sample_weight do not match.")
return sample_weight
def _set_coef(self, coef_):
"""Make sure that coef_ is fortran-style and 2d.
Fortran-style memory layout is needed to ensure that computing
the dot product between input ``X`` and ``coef_`` does not trigger
a memory copy.
"""
self.coef_ = np.asfortranarray(array2d(coef_))
def _allocate_parameter_mem(self, n_classes, n_features, coef_init=None,
intercept_init=None):
"""Allocate mem for parameters; initialize if provided."""
if n_classes > 2:
# allocate coef_ for multi-class
if coef_init is not None:
coef_init = np.asarray(coef_init, order="C")
if coef_init.shape != (n_classes, n_features):
raise ValueError("Provided coef_ does not match dataset. ")
self.coef_ = coef_init
else:
self.coef_ = np.zeros((n_classes, n_features),
dtype=np.float64, order="C")
# allocate intercept_ for multi-class
if intercept_init is not None:
intercept_init = np.asarray(intercept_init, order="C")
if intercept_init.shape != (n_classes, ):
raise ValueError("Provided intercept_init " \
"does not match dataset.")
self.intercept_ = intercept_init
else:
self.intercept_ = np.zeros(n_classes, dtype=np.float64,
order="C")
else:
# allocate coef_ for binary problem
if coef_init is not None:
coef_init = np.asarray(coef_init, dtype=np.float64,
order="C")
coef_init = coef_init.ravel()
if coef_init.shape != (n_features,):
raise ValueError("Provided coef_init does not " \
"match dataset.")
self.coef_ = coef_init
else:
self.coef_ = np.zeros(n_features, dtype=np.float64, order="C")
# allocate intercept_ for binary problem
if intercept_init is not None:
intercept_init = np.asarray(intercept_init, dtype=np.float64)
if intercept_init.shape != (1,) and intercept_init.shape != ():
raise ValueError("Provided intercept_init " \
"does not match dataset.")
self.intercept_ = intercept_init.reshape(1,)
else:
self.intercept_ = np.zeros(1, dtype=np.float64, order="C")
def _check_fit_data(self, X, y):
n_samples, _ = X.shape
if n_samples != y.shape[0]:
raise ValueError("Shapes of X and y do not match.")
|