1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
|
"""
Gaussian Process Regression : cantilever beam model
===================================================
"""
# %%
# In this example, we create a Gaussian Process Regression (GPR) metamodel of the :ref:`cantilever beam <use-case-cantilever-beam>`.
# We use a squared exponential covariance kernel for the Gaussian process. In order to estimate the hyper-parameters, we use a design of experiments of size 20.
# %%
# Definition of the model
# -----------------------
# %%
from openturns.usecases import cantilever_beam
import openturns as ot
import openturns.experimental as otexp
import openturns.viewer as viewer
from matplotlib import pylab as plt
ot.Log.Show(ot.Log.NONE)
# Reset default settings
ot.ResourceMap.Reload()
ot.RandomGenerator.SetSeed(0)
# %%
# We load the cantilever beam use case :
cb = cantilever_beam.CantileverBeam()
# %%
# We define the function which evaluates the output depending on the inputs.
model = cb.model
# %%
# Then we define the distribution of the input random vector.
myDistribution = cb.distribution
# %%
# Create the design of experiments
# --------------------------------
# %%
# We consider a simple Monte-Carlo sample as a design of experiments.
# This is why we generate an input sample using the `getSample` method of the distribution. Then we evaluate the output using the `model` function.
# %%
sampleSize_train = 20
X_train = myDistribution.getSample(sampleSize_train)
Y_train = model(X_train)
# %%
# The following figure presents the distribution of the vertical deviations Y on the training sample. We observe that the large deviations occur less often.
# %%
histo = ot.HistogramFactory().build(Y_train).drawPDF()
histo.setXTitle("Vertical deviation (cm)")
histo.setTitle("Distribution of the vertical deviation")
histo.setLegends([""])
view = viewer.View(histo)
# %%
# Create the metamodel
# --------------------
# %%
# In order to create the GPR metamodel, we first select a constant trend with the `ConstantBasisFactory` class. Then we use a squared exponential covariance kernel.
# The `SquaredExponential` kernel has one amplitude coefficient and 4 scale coefficients.
# This is because this covariance kernel is anisotropic : each of the 4 input variables is associated with its own scale coefficient.
# %%
basis = ot.ConstantBasisFactory(cb.dim).build()
covarianceModel = ot.SquaredExponential(cb.dim)
# %%
# Typically, the optimization algorithm is quite good at setting sensible optimization bounds.
# In this case, however, the range of the input domain is extreme.
# %%
print("Lower and upper bounds of X_train:")
print(X_train.getMin(), X_train.getMax())
# %%
# We need to manually define sensible optimization bounds.
# Note that since the amplitude parameter is computed analytically (this is possible when the output dimension is 1), we only need to set bounds on the scale parameter.
# %%
scaleOptimizationBounds = ot.Interval(
[1.0, 1.0, 1.0, 1.0e-10], [1.0e11, 1.0e3, 1.0e1, 1.0e-5]
)
# %%
# Finally, we use the `GaussianProcessFitter` and `GaussianProcessRegression` classes to create the GPR metamodel.
# It requires a training sample, a covariance kernel and a trend basis as input arguments.
# We need to set the initial scale parameter for the optimization. The upper bound of the input domain is a sensible choice here.
# We must not forget to actually set the optimization bounds defined above.
# %%
covarianceModel.setScale(X_train.getMax())
fitter_algo = otexp.GaussianProcessFitter(X_train, Y_train, covarianceModel, basis)
fitter_algo.setOptimizationBounds(scaleOptimizationBounds)
# %%
# The `run` method optimizes the metamodel hyperparameters.
#
# We can then print the constant trend of the metamodel, estimated using the least squares method.
# %%
fitter_algo.run()
fitter_result = fitter_algo.getResult()
gpr_algo = otexp.GaussianProcessRegression(fitter_result)
gpr_algo.run()
gpr_result = gpr_algo.getResult()
gprMetamodel = gpr_result.getMetaModel()
# %%
# The `getTrendCoefficients` method returns the coefficients of the trend.
# %%
print(gpr_result.getTrendCoefficients())
# %%
# We can also print the hyperparameters of the covariance model, which have been estimated by maximizing the likelihood.
# %%
gpr_result.getCovarianceModel()
# %%
# Validate the metamodel
# ----------------------
# %%
# We finally want to validate the GPR metamodel. This is why we generate a validation sample with size 100 and we evaluate the output of the model on this sample.
# %%
sampleSize_test = 100
X_test = myDistribution.getSample(sampleSize_test)
Y_test = model(X_test)
# %%
# The `MetaModelValidation` classe makes the validation easy. To create it, we use the validation samples and the metamodel.
# %%
val = ot.MetaModelValidation(Y_test, gprMetamodel(X_test))
# %%
# The `computeR2Score` computes the R2 score.
# %%
R2 = val.computeR2Score()[0]
print(R2)
# %%
# The residuals are the difference between the model and the metamodel.
# %%
r = val.getResidualSample()
graph = ot.HistogramFactory().build(r).drawPDF()
graph.setXTitle("Residuals (cm)")
graph.setTitle("Distribution of the residuals")
graph.setLegends([""])
view = viewer.View(graph)
# %%
# We observe that the negative residuals occur with nearly the same frequency of the positive residuals: this is a first sign of good quality.
# %%
# The `drawValidation` method allows one to compare the observed outputs and the metamodel outputs.
# %%
# sphinx_gallery_thumbnail_number = 3
graph = val.drawValidation()
graph.setTitle("R2 = %.2f%%" % (100 * R2))
view = viewer.View(graph)
plt.show()
|