1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
|
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
# Fabian Pedregosa <fabian.pedregosa@inria.fr>
# Olivier Grisel <olivier.grisel@ensta.org>
#
# License: BSD Style.
cimport numpy as np
import numpy as np
import numpy.linalg as linalg
cimport cython
from cpython cimport bool
import warnings
cdef extern from "math.h":
double fabs(double f)
double sqrt(double f)
cdef inline double fmax(double x, double y):
if x > y: return x
return y
cdef inline double fsign(double f):
if f == 0:
return 0
elif f > 0:
return 1.0
else:
return -1.0
cdef extern from "cblas.h":
void daxpy "cblas_daxpy"(int N, double alpha, double *X, int incX,
double *Y, int incY)
double ddot "cblas_ddot"(int N, double *X, int incX, double *Y, int incY)
ctypedef np.float64_t DOUBLE
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
double alpha, double beta,
np.ndarray[DOUBLE, ndim=2] X,
np.ndarray[DOUBLE, ndim=1] y,
int max_iter, double tol, bool positive=False):
"""Cython version of the coordinate descent algorithm
for Elastic-Net regression
We minimize
1 norm(y - X w, 2)^2 + alpha norm(w, 1) + beta norm(w, 2)^2
- ----
2 2
"""
# get the data information into easy vars
cdef unsigned int n_samples = X.shape[0]
cdef unsigned int n_features = X.shape[1]
# compute norms of the columns of X
cdef np.ndarray[DOUBLE, ndim=1] norm_cols_X = (X**2).sum(axis=0)
# initial value of the residuals
cdef np.ndarray[DOUBLE, ndim=1] R
cdef double tmp
cdef double w_ii
cdef double d_w_max
cdef double w_max
cdef double d_w_ii
cdef double gap = tol + 1.0
cdef double d_w_tol = tol
cdef unsigned int ii
cdef unsigned int n_iter
if alpha == 0:
warnings.warn("Coordinate descent with alpha=0 may lead to unexpected"
" results and is discouraged.")
R = y - np.dot(X, w)
tol = tol * linalg.norm(y) ** 2
for n_iter in range(max_iter):
w_max = 0.0
d_w_max = 0.0
for ii in xrange(n_features): # Loop over coordinates
if norm_cols_X[ii] == 0.0:
continue
w_ii = w[ii] # Store previous value
if w_ii != 0.0:
# R += w_ii * X[:,ii]
daxpy(n_samples, w_ii,
<DOUBLE*>(X.data + ii * n_samples * sizeof(DOUBLE)), 1,
<DOUBLE*>R.data, 1)
# tmp = (X[:,ii]*R).sum()
tmp = ddot(n_samples,
<DOUBLE*>(X.data + ii * n_samples * sizeof(DOUBLE)), 1,
<DOUBLE*>R.data, 1)
if positive and tmp < 0 :
w[ii] = 0.0
else:
w[ii] = fsign(tmp) * fmax(fabs(tmp) - alpha, 0) \
/ (norm_cols_X[ii] + beta)
if w[ii] != 0.0:
# R -= w[ii] * X[:,ii] # Update residual
daxpy(n_samples, -w[ii],
<DOUBLE*>(X.data + ii * n_samples * sizeof(DOUBLE)), 1,
<DOUBLE*>R.data, 1)
# update the maximum absolute coefficient update
d_w_ii = fabs(w[ii] - w_ii)
if d_w_ii > d_w_max:
d_w_max = d_w_ii
if fabs(w[ii]) > w_max:
w_max = fabs(w[ii])
if w_max == 0.0 or d_w_max / w_max < d_w_tol or n_iter == max_iter - 1:
# the biggest coordinate update of this iteration was smaller than
# the tolerance: check the duality gap as ultimate stopping
# criterion
XtA = np.dot(X.T, R) - beta * w
if positive:
dual_norm_XtA = np.max(XtA)
else:
dual_norm_XtA = linalg.norm(XtA, np.inf)
# TODO: use squared L2 norm directly
R_norm = linalg.norm(R)
w_norm = linalg.norm(w, 2)
if (dual_norm_XtA > alpha):
const = alpha / dual_norm_XtA
A_norm = R_norm * const
gap = 0.5 * (R_norm**2 + A_norm**2)
else:
const = 1.0
gap = R_norm**2
gap += alpha * linalg.norm(w, 1) - const * np.dot(R.T, y) + \
0.5 * beta * (1 + const**2) * (w_norm**2)
if gap < tol:
# return if we reached desired tolerance
break
return w, gap, tol
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def enet_coordinate_descent_gram(np.ndarray[DOUBLE, ndim=1] w,
double alpha, double beta,
np.ndarray[DOUBLE, ndim=2] Q,
np.ndarray[DOUBLE, ndim=1] q,
np.ndarray[DOUBLE, ndim=1] y,
int max_iter, double tol, bool positive=False):
"""Cython version of the coordinate descent algorithm
for Elastic-Net regression
We minimize
1 w^T Q w - q^T w + alpha norm(w, 1) + beta norm(w, 2)^2
- ----
2 2
which amount to the Elastic-Net problem when:
Q = X^T X (Gram matrix)
q = X^T y
"""
# get the data information into easy vars
cdef unsigned int n_samples = y.shape[0]
cdef unsigned int n_features = Q.shape[0]
# initial value "Q w" which will be kept of up to date in the iterations
cdef np.ndarray[DOUBLE, ndim=1] H = np.dot(Q, w)
cdef double tmp
cdef double w_ii
cdef double d_w_max
cdef double w_max
cdef double d_w_ii
cdef double gap = tol + 1.0
cdef double d_w_tol = tol
cdef unsigned int ii
cdef unsigned int n_iter
cdef double y_norm2 = linalg.norm(y) ** 2
tol = tol * y_norm2
if alpha == 0:
warnings.warn("Coordinate descent with alpha=0 may lead to unexpected"
" results and is discouraged.")
for n_iter in range(max_iter):
w_max = 0.0
d_w_max = 0.0
for ii in xrange(n_features): # Loop over coordinates
if Q[ii,ii] == 0.0:
continue
w_ii = w[ii] # Store previous value
if w_ii != 0.0:
# H -= w_ii * Q[ii]
daxpy(n_features, -w_ii,
<DOUBLE*>(Q.data + ii * n_features * sizeof(DOUBLE)), 1,
<DOUBLE*>H.data, 1)
tmp = q[ii] - H[ii]
if positive and tmp < 0 :
w[ii] = 0.0
else:
w[ii] = fsign(tmp) * fmax(fabs(tmp) - alpha, 0) \
/ (Q[ii,ii] + beta)
if w[ii] != 0.0:
# H += w[ii] * Q[ii] # Update H = X.T X w
daxpy(n_features, w[ii],
<DOUBLE*>(Q.data + ii * n_features * sizeof(DOUBLE)), 1,
<DOUBLE*>H.data, 1)
# update the maximum absolute coefficient update
d_w_ii = fabs(w[ii] - w_ii)
if d_w_ii > d_w_max:
d_w_max = d_w_ii
if fabs(w[ii]) > w_max:
w_max = fabs(w[ii])
if w_max == 0.0 or d_w_max / w_max < d_w_tol or n_iter == max_iter - 1:
# the biggest coordinate update of this iteration was smaller than
# the tolerance: check the duality gap as ultimate stopping
# criterion
q_dot_w = np.dot(w, q)
XtA = q - H - beta * w
if positive:
dual_norm_XtA = np.max(XtA)
else:
dual_norm_XtA = linalg.norm(XtA, np.inf)
R_norm2 = y_norm2 + np.sum(w * H) - 2.0 * q_dot_w
w_norm = linalg.norm(w, 2)
if (dual_norm_XtA > alpha):
const = alpha / dual_norm_XtA
A_norm2 = R_norm2 * (const**2)
gap = 0.5 * (R_norm2 + A_norm2)
else:
const = 1.0
gap = R_norm2
gap += alpha * linalg.norm(w, 1) \
- const * y_norm2 \
+ const * q_dot_w + \
0.5 * beta * (1 + const**2) * (w_norm**2)
if gap < tol:
# return if we reached desired tolerance
break
return w, gap, tol
|