1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864
|
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Author: Vincent Dubourg <vincent.dubourg@gmail.com>
# (mostly translation, see implementation details)
# License: BSD style
import numpy as np
from scipy import linalg, optimize, rand
from ..base import BaseEstimator, RegressorMixin
from ..metrics.pairwise import manhattan_distances
from ..utils import array2d, check_random_state
from . import regression_models as regression
from . import correlation_models as correlation
MACHINE_EPSILON = np.finfo(np.double).eps
if hasattr(linalg, 'solve_triangular'):
# only in scipy since 0.9
solve_triangular = linalg.solve_triangular
else:
# slower, but works
def solve_triangular(x, y, lower=True):
return linalg.solve(x, y)
def l1_cross_distances(X):
"""
Computes the nonzero componentwise L1 cross-distances between the vectors
in X.
Parameters
----------
X: array_like
An array with shape (n_samples, n_features)
Returns
-------
D: array with shape (n_samples * (n_samples - 1) / 2, n_features)
The array of componentwise L1 cross-distances.
ij: arrays with shape (n_samples * (n_samples - 1) / 2, 2)
The indices i and j of the vectors in X associated to the cross-
distances in D: D[k] = np.abs(X[ij[k, 0]] - Y[ij[k, 1]]).
"""
X = array2d(X)
n_samples, n_features = X.shape
n_nonzero_cross_dist = n_samples * (n_samples - 1) / 2
ij = np.zeros((n_nonzero_cross_dist, 2), dtype=np.int)
D = np.zeros((n_nonzero_cross_dist, n_features))
ll_1 = 0
for k in range(n_samples - 1):
ll_0 = ll_1
ll_1 = ll_0 + n_samples - k - 1
ij[ll_0:ll_1, 0] = k
ij[ll_0:ll_1, 1] = np.arange(k + 1, n_samples)
D[ll_0:ll_1] = np.abs(X[k] - X[(k + 1):n_samples])
return D, ij.astype(np.int)
class GaussianProcess(BaseEstimator, RegressorMixin):
"""The Gaussian Process model class.
Parameters
----------
regr : string or callable, optional
A regression function returning an array of outputs of the linear
regression functional basis. The number of observations n_samples
should be greater than the size p of this basis.
Default assumes a simple constant regression trend.
Available built-in regression models are::
'constant', 'linear', 'quadratic'
corr : string or callable, optional
A stationary autocorrelation function returning the autocorrelation
between two points x and x'.
Default assumes a squared-exponential autocorrelation model.
Built-in correlation models are::
'absolute_exponential', 'squared_exponential',
'generalized_exponential', 'cubic', 'linear'
beta0 : double array_like, optional
The regression weight vector to perform Ordinary Kriging (OK).
Default assumes Universal Kriging (UK) so that the vector beta of
regression weights is estimated using the maximum likelihood
principle.
storage_mode : string, optional
A string specifying whether the Cholesky decomposition of the
correlation matrix should be stored in the class (storage_mode =
'full') or not (storage_mode = 'light').
Default assumes storage_mode = 'full', so that the
Cholesky decomposition of the correlation matrix is stored.
This might be a useful parameter when one is not interested in the
MSE and only plan to estimate the BLUP, for which the correlation
matrix is not required.
verbose : boolean, optional
A boolean specifying the verbose level.
Default is verbose = False.
theta0 : double array_like, optional
An array with shape (n_features, ) or (1, ).
The parameters in the autocorrelation model.
If thetaL and thetaU are also specified, theta0 is considered as
the starting point for the maximum likelihood rstimation of the
best set of parameters.
Default assumes isotropic autocorrelation model with theta0 = 1e-1.
thetaL : double array_like, optional
An array with shape matching theta0's.
Lower bound on the autocorrelation parameters for maximum
likelihood estimation.
Default is None, so that it skips maximum likelihood estimation and
it uses theta0.
thetaU : double array_like, optional
An array with shape matching theta0's.
Upper bound on the autocorrelation parameters for maximum
likelihood estimation.
Default is None, so that it skips maximum likelihood estimation and
it uses theta0.
normalize : boolean, optional
Input X and observations y are centered and reduced wrt
means and standard deviations estimated from the n_samples
observations provided.
Default is normalize = True so that data is normalized to ease
maximum likelihood estimation.
nugget : double or ndarray, optional
Introduce a nugget effect to allow smooth predictions from noisy
data. If nugget is an ndarray, it must be the same length as the
number of data points used for the fit.
The nugget is added to the diagonal of the assumed training covariance;
in this way it acts as a Tikhonov regularization in the problem. In
the special case of the squared exponential correlation function, the
nugget mathematically represents the variance of the input values.
Default assumes a nugget close to machine precision for the sake of
robustness (nugget = 10. * MACHINE_EPSILON).
optimizer : string, optional
A string specifying the optimization algorithm to be used.
Default uses 'fmin_cobyla' algorithm from scipy.optimize.
Available optimizers are::
'fmin_cobyla', 'Welch'
'Welch' optimizer is dued to Welch et al., see reference [WBSWM1992]_.
It consists in iterating over several one-dimensional optimizations
instead of running one single multi-dimensional optimization.
random_start : int, optional
The number of times the Maximum Likelihood Estimation should be
performed from a random starting point.
The first MLE always uses the specified starting point (theta0),
the next starting points are picked at random according to an
exponential distribution (log-uniform on [thetaL, thetaU]).
Default does not use random starting point (random_start = 1).
random_state: integer or numpy.RandomState, optional
The generator used to shuffle the sequence of coordinates of theta in
the Welch optimizer. If an integer is given, it fixes the seed.
Defaults to the global numpy random number generator.
Examples
--------
>>> import numpy as np
>>> from sklearn.gaussian_process import GaussianProcess
>>> X = np.array([[1., 3., 5., 6., 7., 8.]]).T
>>> y = (X * np.sin(X)).ravel()
>>> gp = GaussianProcess(theta0=0.1, thetaL=.001, thetaU=1.)
>>> gp.fit(X, y) # doctest: +ELLIPSIS
GaussianProcess(beta0=None...
...
Notes
-----
The presentation implementation is based on a translation of the DACE
Matlab toolbox, see reference [NLNS2002]_.
References
----------
.. [NLNS2002] `H.B. Nielsen, S.N. Lophaven, H. B. Nielsen and J.
Sondergaard. DACE - A MATLAB Kriging Toolbox.` (2002)
http://www2.imm.dtu.dk/~hbn/dace/dace.pdf
.. [WBSWM1992] `W.J. Welch, R.J. Buck, J. Sacks, H.P. Wynn, T.J. Mitchell,
and M.D. Morris (1992). Screening, predicting, and computer
experiments. Technometrics, 34(1) 15--25.`
http://www.jstor.org/pss/1269548
"""
_regression_types = {
'constant': regression.constant,
'linear': regression.linear,
'quadratic': regression.quadratic}
_correlation_types = {
'absolute_exponential': correlation.absolute_exponential,
'squared_exponential': correlation.squared_exponential,
'generalized_exponential': correlation.generalized_exponential,
'cubic': correlation.cubic,
'linear': correlation.linear}
_optimizer_types = [
'fmin_cobyla',
'Welch']
def __init__(self, regr='constant', corr='squared_exponential', beta0=None,
storage_mode='full', verbose=False, theta0=1e-1,
thetaL=None, thetaU=None, optimizer='fmin_cobyla',
random_start=1, normalize=True,
nugget=10. * MACHINE_EPSILON, random_state=None):
self.regr = regr
self.corr = corr
self.beta0 = beta0
self.storage_mode = storage_mode
self.verbose = verbose
self.theta0 = theta0
self.thetaL = thetaL
self.thetaU = thetaU
self.normalize = normalize
self.nugget = nugget
self.optimizer = optimizer
self.random_start = random_start
self.random_state = random_state
# Run input checks
self._check_params()
def fit(self, X, y):
"""
The Gaussian Process model fitting method.
Parameters
----------
X : double array_like
An array with shape (n_samples, n_features) with the input at which
observations were made.
y : double array_like
An array with shape (n_features, ) with the observations of the
scalar output to be predicted.
Returns
-------
gp : self
A fitted Gaussian Process model object awaiting data to perform
predictions.
"""
self.random_state = check_random_state(self.random_state)
# Force data to 2D numpy.array
X = array2d(np.asarray(X))
y = np.asarray(y).ravel()[:, np.newaxis]
# Check shapes of DOE & observations
n_samples_X, n_features = X.shape
n_samples_y = y.shape[0]
if n_samples_X != n_samples_y:
raise ValueError("X and y must have the same number of rows.")
else:
n_samples = n_samples_X
# Run input checks
self._check_params(n_samples)
# Normalize data or don't
if self.normalize:
X_mean = np.mean(X, axis=0)
X_std = np.std(X, axis=0)
y_mean = np.mean(y, axis=0)
y_std = np.std(y, axis=0)
X_std[X_std == 0.] = 1.
y_std[y_std == 0.] = 1.
# center and scale X if necessary
X = (X - X_mean) / X_std
y = (y - y_mean) / y_std
else:
X_mean = np.zeros(1)
X_std = np.ones(1)
y_mean = np.zeros(1)
y_std = np.ones(1)
# Calculate matrix of distances D between samples
D, ij = l1_cross_distances(X)
if np.min(np.sum(D, axis=1)) == 0. \
and self.corr != correlation.pure_nugget:
raise Exception("Multiple input features cannot have the same"
" value")
# Regression matrix and parameters
F = self.regr(X)
n_samples_F = F.shape[0]
if F.ndim > 1:
p = F.shape[1]
else:
p = 1
if n_samples_F != n_samples:
raise Exception("Number of rows in F and X do not match. Most "
+ "likely something is going wrong with the "
+ "regression model.")
if p > n_samples_F:
raise Exception(("Ordinary least squares problem is undetermined "
+ "n_samples=%d must be greater than the "
+ "regression model size p=%d.") % (n_samples, p))
if self.beta0 is not None:
if self.beta0.shape[0] != p:
raise Exception("Shapes of beta0 and F do not match.")
# Set attributes
self.X = X
self.y = y
self.D = D
self.ij = ij
self.F = F
self.X_mean, self.X_std = X_mean, X_std
self.y_mean, self.y_std = y_mean, y_std
# Determine Gaussian Process model parameters
if self.thetaL is not None and self.thetaU is not None:
# Maximum Likelihood Estimation of the parameters
if self.verbose:
print("Performing Maximum Likelihood Estimation of the "
+ "autocorrelation parameters...")
self.theta, self.reduced_likelihood_function_value, par = \
self.arg_max_reduced_likelihood_function()
if np.isinf(self.reduced_likelihood_function_value):
raise Exception("Bad parameter region. "
+ "Try increasing upper bound")
else:
# Given parameters
if self.verbose:
print("Given autocorrelation parameters. "
+ "Computing Gaussian Process model parameters...")
self.theta = self.theta0
self.reduced_likelihood_function_value, par = \
self.reduced_likelihood_function()
if np.isinf(self.reduced_likelihood_function_value):
raise Exception("Bad point. Try increasing theta0.")
self.beta = par['beta']
self.gamma = par['gamma']
self.sigma2 = par['sigma2']
self.C = par['C']
self.Ft = par['Ft']
self.G = par['G']
if self.storage_mode == 'light':
# Delete heavy data (it will be computed again if required)
# (it is required only when MSE is wanted in self.predict)
if self.verbose:
print("Light storage mode specified. "
+ "Flushing autocorrelation matrix...")
self.D = None
self.ij = None
self.F = None
self.C = None
self.Ft = None
self.G = None
return self
def predict(self, X, eval_MSE=False, batch_size=None):
"""
This function evaluates the Gaussian Process model at x.
Parameters
----------
X : array_like
An array with shape (n_eval, n_features) giving the point(s) at
which the prediction(s) should be made.
eval_MSE : boolean, optional
A boolean specifying whether the Mean Squared Error should be
evaluated or not.
Default assumes evalMSE = False and evaluates only the BLUP (mean
prediction).
batch_size : integer, optional
An integer giving the maximum number of points that can be
evaluated simulatneously (depending on the available memory).
Default is None so that all given points are evaluated at the same
time.
Returns
-------
y : array_like
An array with shape (n_eval, ) with the Best Linear Unbiased
Prediction at x.
MSE : array_like, optional (if eval_MSE == True)
An array with shape (n_eval, ) with the Mean Squared Error at x.
"""
# Check input shapes
X = array2d(X)
n_eval, n_features_X = X.shape
n_samples, n_features = self.X.shape
# Run input checks
self._check_params(n_samples)
if n_features_X != n_features:
raise ValueError(("The number of features in X (X.shape[1] = %d) "
+ "should match the sample size used for fit() "
+ "which is %d.") % (n_features_X, n_features))
if batch_size is None:
# No memory management
# (evaluates all given points in a single batch run)
# Normalize input
X = (X - self.X_mean) / self.X_std
# Initialize output
y = np.zeros(n_eval)
if eval_MSE:
MSE = np.zeros(n_eval)
# Get pairwise componentwise L1-distances to the input training set
dx = manhattan_distances(X, Y=self.X, sum_over_features=False)
# Get regression function and correlation
f = self.regr(X)
r = self.corr(self.theta, dx).reshape(n_eval, n_samples)
# Scaled predictor
y_ = np.dot(f, self.beta) + np.dot(r, self.gamma)
# Predictor
y = (self.y_mean + self.y_std * y_).ravel()
# Mean Squared Error
if eval_MSE:
C = self.C
if C is None:
# Light storage mode (need to recompute C, F, Ft and G)
if self.verbose:
print("This GaussianProcess used 'light' storage mode "
+ "at instanciation. Need to recompute "
+ "autocorrelation matrix...")
reduced_likelihood_function_value, par = \
self.reduced_likelihood_function()
self.C = par['C']
self.Ft = par['Ft']
self.G = par['G']
rt = solve_triangular(self.C, r.T, lower=True)
if self.beta0 is None:
# Universal Kriging
u = solve_triangular(self.G.T,
np.dot(self.Ft.T, rt) - f.T)
else:
# Ordinary Kriging
u = np.zeros(y.shape)
MSE = self.sigma2 * (1. - (rt ** 2.).sum(axis=0)
+ (u ** 2.).sum(axis=0))
# Mean Squared Error might be slightly negative depending on
# machine precision: force to zero!
MSE[MSE < 0.] = 0.
return y, MSE
else:
return y
else:
# Memory management
if type(batch_size) is not int or batch_size <= 0:
raise Exception("batch_size must be a positive integer")
if eval_MSE:
y, MSE = np.zeros(n_eval), np.zeros(n_eval)
for k in range(max(1, n_eval / batch_size)):
batch_from = k * batch_size
batch_to = min([(k + 1) * batch_size + 1, n_eval + 1])
y[batch_from:batch_to], MSE[batch_from:batch_to] = \
self.predict(X[batch_from:batch_to],
eval_MSE=eval_MSE, batch_size=None)
return y, MSE
else:
y = np.zeros(n_eval)
for k in range(max(1, n_eval / batch_size)):
batch_from = k * batch_size
batch_to = min([(k + 1) * batch_size + 1, n_eval + 1])
y[batch_from:batch_to] = \
self.predict(X[batch_from:batch_to],
eval_MSE=eval_MSE, batch_size=None)
return y
def reduced_likelihood_function(self, theta=None):
"""
This function determines the BLUP parameters and evaluates the reduced
likelihood function for the given autocorrelation parameters theta.
Maximizing this function wrt the autocorrelation parameters theta is
equivalent to maximizing the likelihood of the assumed joint Gaussian
distribution of the observations y evaluated onto the design of
experiments X.
Parameters
----------
theta : array_like, optional
An array containing the autocorrelation parameters at which the
Gaussian Process model parameters should be determined.
Default uses the built-in autocorrelation parameters
(ie theta = self.theta).
Returns
-------
reduced_likelihood_function_value : double
The value of the reduced likelihood function associated to the
given autocorrelation parameters theta.
par : dict
A dictionary containing the requested Gaussian Process model
parameters:
sigma2
Gaussian Process variance.
beta
Generalized least-squares regression weights for
Universal Kriging or given beta0 for Ordinary
Kriging.
gamma
Gaussian Process weights.
C
Cholesky decomposition of the correlation matrix [R].
Ft
Solution of the linear equation system : [R] x Ft = F
G
QR decomposition of the matrix Ft.
"""
if theta is None:
# Use built-in autocorrelation parameters
theta = self.theta
# Initialize output
reduced_likelihood_function_value = - np.inf
par = {}
# Retrieve data
n_samples = self.X.shape[0]
D = self.D
ij = self.ij
F = self.F
if D is None:
# Light storage mode (need to recompute D, ij and F)
D, ij = l1_cross_distances(self.X)
if np.min(np.sum(D, axis=1)) == 0. \
and self.corr != correlation.pure_nugget:
raise Exception("Multiple X are not allowed")
F = self.regr(self.X)
# Set up R
r = self.corr(theta, D)
R = np.eye(n_samples) * (1. + self.nugget)
R[ij[:, 0], ij[:, 1]] = r
R[ij[:, 1], ij[:, 0]] = r
# Cholesky decomposition of R
try:
C = linalg.cholesky(R, lower=True)
except linalg.LinAlgError:
return reduced_likelihood_function_value, par
# Get generalized least squares solution
Ft = solve_triangular(C, F, lower=True)
try:
Q, G = linalg.qr(Ft, econ=True)
except:
#/usr/lib/python2.6/dist-packages/scipy/linalg/decomp.py:1177:
# DeprecationWarning: qr econ argument will be removed after scipy
# 0.7. The economy transform will then be available through the
# mode='economic' argument.
Q, G = linalg.qr(Ft, mode='economic')
pass
sv = linalg.svd(G, compute_uv=False)
rcondG = sv[-1] / sv[0]
if rcondG < 1e-10:
# Check F
sv = linalg.svd(F, compute_uv=False)
condF = sv[0] / sv[-1]
if condF > 1e15:
raise Exception("F is too ill conditioned. Poor combination "
+ "of regression model and observations.")
else:
# Ft is too ill conditioned, get out (try different theta)
return reduced_likelihood_function_value, par
Yt = solve_triangular(C, self.y, lower=True)
if self.beta0 is None:
# Universal Kriging
beta = solve_triangular(G, np.dot(Q.T, Yt))
else:
# Ordinary Kriging
beta = np.array(self.beta0)
rho = Yt - np.dot(Ft, beta)
sigma2 = (rho ** 2.).sum(axis=0) / n_samples
# The determinant of R is equal to the squared product of the diagonal
# elements of its Cholesky decomposition C
detR = (np.diag(C) ** (2. / n_samples)).prod()
# Compute/Organize output
reduced_likelihood_function_value = - sigma2.sum() * detR
par['sigma2'] = sigma2 * self.y_std ** 2.
par['beta'] = beta
par['gamma'] = solve_triangular(C.T, rho)
par['C'] = C
par['Ft'] = Ft
par['G'] = G
return reduced_likelihood_function_value, par
def arg_max_reduced_likelihood_function(self):
"""
This function estimates the autocorrelation parameters theta as the
maximizer of the reduced likelihood function.
(Minimization of the opposite reduced likelihood function is used for
convenience)
Parameters
----------
self : All parameters are stored in the Gaussian Process model object.
Returns
-------
optimal_theta : array_like
The best set of autocorrelation parameters (the sought maximizer of
the reduced likelihood function).
optimal_reduced_likelihood_function_value : double
The optimal reduced likelihood function value.
optimal_par : dict
The BLUP parameters associated to thetaOpt.
"""
# Initialize output
best_optimal_theta = []
best_optimal_rlf_value = []
best_optimal_par = []
if self.verbose:
print "The chosen optimizer is: " + str(self.optimizer)
if self.random_start > 1:
print str(self.random_start) + " random starts are required."
percent_completed = 0.
# Force optimizer to fmin_cobyla if the model is meant to be isotropic
if self.optimizer == 'Welch' and self.theta0.size == 1:
self.optimizer = 'fmin_cobyla'
if self.optimizer == 'fmin_cobyla':
def minus_reduced_likelihood_function(log10t):
return - self.reduced_likelihood_function(theta=10.
** log10t)[0]
constraints = []
for i in range(self.theta0.size):
constraints.append(lambda log10t: \
log10t[i] - np.log10(self.thetaL[0, i]))
constraints.append(lambda log10t: \
np.log10(self.thetaU[0, i]) - log10t[i])
for k in range(self.random_start):
if k == 0:
# Use specified starting point as first guess
theta0 = self.theta0
else:
# Generate a random starting point log10-uniformly
# distributed between bounds
log10theta0 = np.log10(self.thetaL) \
+ rand(self.theta0.size).reshape(self.theta0.shape) \
* np.log10(self.thetaU / self.thetaL)
theta0 = 10. ** log10theta0
# Run Cobyla
log10_optimal_theta = \
optimize.fmin_cobyla(minus_reduced_likelihood_function,
np.log10(theta0), constraints, iprint=0)
optimal_theta = 10. ** log10_optimal_theta
optimal_minus_rlf_value, optimal_par = \
self.reduced_likelihood_function(theta=optimal_theta)
optimal_rlf_value = - optimal_minus_rlf_value
# Compare the new optimizer to the best previous one
if k > 0:
if optimal_rlf_value > best_optimal_rlf_value:
best_optimal_rlf_value = optimal_rlf_value
best_optimal_par = optimal_par
best_optimal_theta = optimal_theta
else:
best_optimal_rlf_value = optimal_rlf_value
best_optimal_par = optimal_par
best_optimal_theta = optimal_theta
if self.verbose and self.random_start > 1:
if (20 * k) / self.random_start > percent_completed:
percent_completed = (20 * k) / self.random_start
print "%s completed" % (5 * percent_completed)
optimal_rlf_value = best_optimal_rlf_value
optimal_par = best_optimal_par
optimal_theta = best_optimal_theta
elif self.optimizer == 'Welch':
# Backup of the given atrributes
theta0, thetaL, thetaU = self.theta0, self.thetaL, self.thetaU
corr = self.corr
verbose = self.verbose
# This will iterate over fmin_cobyla optimizer
self.optimizer = 'fmin_cobyla'
self.verbose = False
# Initialize under isotropy assumption
if verbose:
print("Initialize under isotropy assumption...")
self.theta0 = array2d(self.theta0.min())
self.thetaL = array2d(self.thetaL.min())
self.thetaU = array2d(self.thetaU.max())
theta_iso, optimal_rlf_value_iso, par_iso = \
self.arg_max_reduced_likelihood_function()
optimal_theta = theta_iso + np.zeros(theta0.shape)
# Iterate over all dimensions of theta allowing for anisotropy
if verbose:
print("Now improving allowing for anisotropy...")
for i in self.random_state.permutation(theta0.size):
if verbose:
print "Proceeding along dimension %d..." % (i + 1)
self.theta0 = array2d(theta_iso)
self.thetaL = array2d(thetaL[0, i])
self.thetaU = array2d(thetaU[0, i])
def corr_cut(t, d):
return corr(array2d(np.hstack([
optimal_theta[0][0:i],
t[0],
optimal_theta[0][(i + 1)::]])), d)
self.corr = corr_cut
optimal_theta[0, i], optimal_rlf_value, optimal_par = \
self.arg_max_reduced_likelihood_function()
# Restore the given atrributes
self.theta0, self.thetaL, self.thetaU = theta0, thetaL, thetaU
self.corr = corr
self.optimizer = 'Welch'
self.verbose = verbose
else:
raise NotImplementedError(("This optimizer ('%s') is not "
+ "implemented yet. Please contribute!")
% self.optimizer)
return optimal_theta, optimal_rlf_value, optimal_par
def _check_params(self, n_samples=None):
# Check regression model
if not callable(self.regr):
if self.regr in self._regression_types:
self.regr = self._regression_types[self.regr]
else:
raise ValueError(("regr should be one of %s or callable, "
+ "%s was given.")
% (self._regression_types.keys(), self.regr))
# Check regression weights if given (Ordinary Kriging)
if self.beta0 is not None:
self.beta0 = array2d(self.beta0)
if self.beta0.shape[1] != 1:
# Force to column vector
self.beta0 = self.beta0.T
# Check correlation model
if not callable(self.corr):
if self.corr in self._correlation_types:
self.corr = self._correlation_types[self.corr]
else:
raise ValueError(("corr should be one of %s or callable, "
+ "%s was given.")
% (self._correlation_types.keys(), self.corr))
# Check storage mode
if self.storage_mode != 'full' and self.storage_mode != 'light':
raise ValueError("Storage mode should either be 'full' or "
+ "'light', %s was given." % self.storage_mode)
# Check correlation parameters
self.theta0 = array2d(self.theta0)
lth = self.theta0.size
if self.thetaL is not None and self.thetaU is not None:
self.thetaL = array2d(self.thetaL)
self.thetaU = array2d(self.thetaU)
if self.thetaL.size != lth or self.thetaU.size != lth:
raise ValueError("theta0, thetaL and thetaU must have the "
+ "same length.")
if np.any(self.thetaL <= 0) or np.any(self.thetaU < self.thetaL):
raise ValueError("The bounds must satisfy O < thetaL <= "
+ "thetaU.")
elif self.thetaL is None and self.thetaU is None:
if np.any(self.theta0 <= 0):
raise ValueError("theta0 must be strictly positive.")
elif self.thetaL is None or self.thetaU is None:
raise ValueError("thetaL and thetaU should either be both or "
+ "neither specified.")
# Force verbose type to bool
self.verbose = bool(self.verbose)
# Force normalize type to bool
self.normalize = bool(self.normalize)
# Check nugget value
self.nugget = np.asarray(self.nugget)
if np.any(self.nugget) < 0.:
raise ValueError("nugget must be positive or zero.")
if (n_samples is not None
and self.nugget.shape not in [(), (n_samples,)]):
raise ValueError("nugget must be either a scalar "
"or array of length n_samples.")
# Check optimizer
if not self.optimizer in self._optimizer_types:
raise ValueError("optimizer should be one of %s"
% self._optimizer_types)
# Force random_start type to int
self.random_start = int(self.random_start)
|