File: gp.py

package info (click to toggle)
python-ase 3.22.1-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 14,344 kB
  • sloc: python: 126,379; xml: 946; makefile: 111; javascript: 47
file content (181 lines) | stat: -rw-r--r-- 7,106 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import numpy as np
from scipy.optimize import minimize
from scipy.linalg import solve_triangular, cho_factor, cho_solve
from ase.optimize.gpmin.kernel import SquaredExponential
from ase.optimize.gpmin.prior import ZeroPrior


class GaussianProcess():
    """Gaussian Process Regression
    It is recommended to be used with other Priors and Kernels from
    ase.optimize.gpmin

    Parameters:

    prior: Prior class, as in ase.optimize.gpmin.prior
           Defaults to ZeroPrior

    kernel: Kernel function for the regression, as in
            ase.optimize.gpmin.kernel
            Defaults to the Squared Exponential kernel with derivatives
    """
    def __init__(self, prior=None, kernel=None):
        if kernel is None:
            self.kernel = SquaredExponential()
        else:
            self.kernel = kernel

        if prior is None:
            self.prior = ZeroPrior()
        else:
            self.prior = prior

    def set_hyperparams(self, params):
        """Set hyperparameters of the regression.
        This is a list containing the parameters of the
        kernel and the regularization (noise)
        of the method as the last entry.
        """
        self.hyperparams = params
        self.kernel.set_params(params[:-1])
        self.noise = params[-1]

    def train(self, X, Y, noise=None):
        """Produces a PES model from data.

        Given a set of observations, X, Y, compute the K matrix
        of the Kernel given the data (and its cholesky factorization)
        This method should be executed whenever more data is added.

        Parameters:

        X: observations (i.e. positions). numpy array with shape: nsamples x D
        Y: targets (i.e. energy and forces). numpy array with
            shape (nsamples, D+1)
        noise: Noise parameter in the case it needs to be restated.
        """
        if noise is not None:
            self.noise = noise  # Set noise attribute to a different value

        self.X = X.copy()  # Store the data in an attribute
        n = self.X.shape[0]
        D = self.X.shape[1]
        regularization = np.array(n * ([self.noise * self.kernel.l] +
                                  D * [self.noise]))

        K = self.kernel.kernel_matrix(X)  # Compute the kernel matrix
        K[range(K.shape[0]), range(K.shape[0])] += regularization**2

        self.m = self.prior.prior(X)
        self.a = Y.flatten() - self.m
        self.L, self.lower = cho_factor(K, lower=True, check_finite=True)
        cho_solve((self.L, self.lower), self.a, overwrite_b=True,
                  check_finite=True)

    def predict(self, x, get_variance=False):
        """Given a trained Gaussian Process, it predicts the value and the
        uncertainty at point x. It returns f and V:
        f : prediction: [y, grady]
        V : Covariance matrix. Its diagonal is the variance of each component
            of f.

        Parameters:

        x (1D np.array):      The position at which the prediction is computed
        get_variance (bool):  if False, only the prediction f is returned
                              if True, the prediction f and the variance V are
                              returned: Note V is O(D*nsample2)
        """
        n = self.X.shape[0]
        k = self.kernel.kernel_vector(x, self.X, n)
        f = self.prior.prior(x) + np.dot(k, self.a)
        if get_variance:
            v = solve_triangular(self.L, k.T.copy(), lower=True,
                                 check_finite=False)
            variance = self.kernel.kernel(x, x)
            # covariance = np.matmul(v.T, v)
            covariance = np.tensordot(v, v, axes=(0, 0))
            V = variance - covariance
            return f, V
        return f

    def neg_log_likelihood(self, params, *args):
        """Negative logarithm of the marginal likelihood and its derivative.
        It has been built in the form that suits the best its optimization,
        with the scipy minimize module, to find the optimal hyperparameters.

        Parameters:

        l: The scale for which we compute the marginal likelihood
        *args: Should be a tuple containing the inputs and targets
               in the training set-
        """
        X, Y = args
        # Come back to this
        self.kernel.set_params(np.array([params[0], params[1], self.noise]))
        self.train(X, Y)
        y = Y.flatten()

        # Compute log likelihood
        logP = (-0.5 * np.dot(y - self.m, self.a) -
                np.sum(np.log(np.diag(self.L))) -
                X.shape[0] * 0.5 * np.log(2 * np.pi))

        # Gradient of the loglikelihood
        grad = self.kernel.gradient(X)

        # vectorizing the derivative of the log likelihood
        D_P_input = np.array([np.dot(np.outer(self.a, self.a), g)
                              for g in grad])
        D_complexity = np.array([cho_solve((self.L, self.lower), g)
                                 for g in grad])

        DlogP = 0.5 * np.trace(D_P_input - D_complexity, axis1=1, axis2=2)
        return -logP, -DlogP

    def fit_hyperparameters(self, X, Y, tol=1e-2, eps=None):
        """Given a set of observations, X, Y; optimize the scale
        of the Gaussian Process maximizing the marginal log-likelihood.
        This method calls TRAIN there is no need to call the TRAIN method
        again. The method also sets the parameters of the Kernel to their
        optimal value at the end of execution

        Parameters:

        X:   observations(i.e. positions). numpy array with shape: nsamples x D
        Y:   targets (i.e. energy and forces).
             numpy array with shape (nsamples, D+1)
        tol: tolerance on the maximum component of the gradient of the
             log-likelihood.
             (See scipy's L-BFGS-B documentation:
             https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html)
        eps: include bounds to the hyperparameters as a +- a percentage
             if eps is None there are no bounds in the optimization

        Returns:

        result (dict) :
              result = {'hyperparameters': (numpy.array) New hyperparameters,
                        'converged': (bool) True if it converged,
                                            False otherwise
                       }
        """
        params = np.copy(self.hyperparams)[:2]
        arguments = (X, Y)
        if eps is not None:
            bounds = [((1 - eps) * p, (1 + eps) * p) for p in params]
        else:
            bounds = None

        result = minimize(self.neg_log_likelihood, params, args=arguments,
                          method='L-BFGS-B', jac=True, bounds=bounds,
                          options={'gtol': tol, 'ftol': 0.01 * tol})

        if not result.success:
            converged = False
        else:
            converged = True
            self.hyperparams = np.array([result.x.copy()[0],
                                         result.x.copy()[1], self.noise])
        self.set_hyperparams(self.hyperparams)
        return {'hyperparameters': self.hyperparams, 'converged': converged}