1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
"""
Estimate a GEV on the Venice sea-levels data
============================================
"""
# %%
# In this example, we illustrate various techniques of extreme value modeling applied
# to the annual maximum sea-levels recorded in Venice over the period 1931-1981.
# Readers should refer to [coles2001]_ to get more details.
#
# We illustrate techniques to:
#
# - estimate a stationary GEV, using the :math:`r`-largest annual sea-levels for :math:`r\geq 1`,
#
# using:
#
# - the log-likelihood function,
# - the profile log-likelihood function.
#
# First, we load the 10 largest annual sea-levels in Venice. We start by looking at them
# through time. Note that for the year 1935, only the largest 6 observations are available.
# For simplicity of the example, we removed that year from the data but it could be
# used for all the analyses beased on the largest :math:`r` annual sea-levels for :math:`r \leq 6`.
import openturns as ot
import openturns.viewer as otv
from openturns.usecases import coles
data = coles.Coles().venice
# %%
# The column :math:`i` contains the largest :math:`i` annual sea-levels.
print(data[:5])
graph = ot.Graph("Annual maximum sea-levels in Venice", "year", "level (cm)", True, "")
for r in range(10):
cloud = ot.Cloud(data[:, [0, 1 + r]])
graph.add(cloud)
graph.setIntegerXTick(True)
view = otv.View(graph)
# %%
# **Stationary GEV modeling from the annual maximum sea-levels**
#
# We first assume that the dependence through time is negligible, so we first model the data as
# independent observations over the observation period. We estimate the parameters of the
# GEV distribution by maximizing the log-likelihood of the data.
# We select the first column of the data.
sample = data[:, 1]
# %%
# Estimate the parameters of the GEV by maximizing the log-likehood
# and compute the parameter distribution
factory = ot.GeneralizedExtremeValueFactory()
result_LL_max = factory.buildMethodOfLikelihoodMaximizationEstimator(sample)
# %%
# We get the fitted GEV and its parameters of :math:`(\hat{\mu}, \hat{\sigma}, \hat{\xi})`.
fitted_GEV = result_LL_max.getDistribution()
desc = fitted_GEV.getParameterDescription()
param = fitted_GEV.getParameter()
print(", ".join([f"{p}: {value:.3f}" for p, value in zip(desc, param)]))
print("Max log-likelihood (one max): ", result_LL_max.getLogLikelihood())
# %%
# We get the asymptotic distribution of the estimator :math:`(\hat{\mu}, \hat{\sigma}, \hat{\xi})`.
# In that case, the asymptotic distribution is normal.
parameterEstimate = result_LL_max.getParameterDistribution()
print("Asymptotic distribution of the estimator : ")
print(parameterEstimate)
# %%
# We get the covariance matrix and the standard deviation of :math:`(\hat{\mu}, \hat{\sigma}, \hat{\xi})`.
print("Cov matrix = ", parameterEstimate.getCovariance())
print("Standard dev = ", parameterEstimate.getStandardDeviation())
# %%
# We get the marginal confidence intervals of order 0.95.
order = 0.95
for i in range(3):
ci = parameterEstimate.getMarginal(i).computeBilateralConfidenceInterval(order)
print(desc[i] + ":", ci)
# %%
# At last, we can validate the inference result thanks the 4 usual diagnostic plots:
#
# - the probability-probability pot,
# - the quantile-quantile pot,
# - the return level plot,
# - the empirical distribution function.
validation = ot.GeneralizedExtremeValueValidation(result_LL_max, sample)
graph = validation.drawDiagnosticPlot()
view = otv.View(graph)
# %%
# We can also use the profile log-likehood function rather than log-likehood function to estimate the parameters of the GEV.
result_PLL_max = factory.buildMethodOfXiProfileLikelihoodEstimator(sample)
# %%
# The following graph allows one to get the profile log-likelihood plot.
# It also indicates the optimal value of :math:`\xi`, the maximum profile log-likelihood and
# the confidence interval for :math:`\xi` of order 0.95 (which is the default value).
order = 0.95
result_PLL_max.setConfidenceLevel(order)
print(result_PLL_max.getParameterConfidenceInterval())
# %%
# We can get the numerical values of the confidence interval: it appears to be a bit smaller
# with the interval obtained from the profile log-likelihood function than with the log-likelihood function.
# Note that if the order requested is too high, the confidence interval might not be calculated because
# one of its bound is out of the definition domain of the log-likelihood function.
try:
print(
"Confidence interval for xi = ", result_PLL_max.getParameterConfidenceInterval()
)
except Exception as ex:
print(type(ex))
pass
# %%
# **Stationary GEV modeling from the largest** :math:`r` **annual sea-levels**
#
# We still assume that the dependence through time is negligible. We estimate the parameters of the
# GEV distribution by maximizing the log-likelihood of the data. Now, we want to model more of the
# observed extremes than the annual maxima: the additional information contained in the largest
# :math:`10` observations can be used to improve the estimation of the GEV model.
#
# Now, we drop the year column to keep only the maxima values.
sample_rmax = data[:, 1:]
print(sample_rmax[:5])
# %%
# We estimate the parameters from the largest :math:`r` annual sea-levels for :math:`r=1,5,10`.
# For each :math:`r` value, we get the estimated parameters.
factory = ot.GeneralizedExtremeValueFactory()
r_candidate = [1, 5, 10]
for r in r_candidate:
estimate = factory.buildMethodOfLikelihoodMaximization(sample_rmax, r)
desc = estimate.getParameterDescription()
p = estimate.getParameter()
pretty_p = ", ".join([f"{param}: {value:.3f}" for param, value in zip(desc, p)])
print(f"r={r:2} {pretty_p}")
# %%
otv.View.ShowAll()
|