1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
|
"""
Sampling from an unscaled probability density
=============================================
"""
# %%
# In this example, we show different ways to sample a distribution which PDF is known up to a normalization constant:
#
# - Case 1: computing the normalization factor and using a :class:`~openturns.PythonDistribution`,
# - Case 2: using the Ratio of Uniforms algorithm with the class :class:`~openturns.experimental.RatioOfUniforms`
# - Case 3: using some Metropolis-Hastings algorithms with the class :class:`~openturns.IndependentMetropolisHastings`
# and :class:`~openturns.RandomWalkMetropolisHastings`.
#
# Consider a distribution whose
# probability density function :math:`p` is known up to the normalization factor :math:`c`:
# let :math:`f` be a function such that
# :math:`p = cf` with :math:`c \in \Rset^+_*`.
#
# We illustrate the case with:
#
# .. math::
#
# f(x) = \frac{1}{2} (2 + \sin(x)^2) \exp \left[- \left(2 + \cos(3x)^3 + \sin(2x)^3 \right) x
# \right] \mathbf{1}_{[0, 2 \pi]}(x).
#
# First, we draw the unscaled probability density function.
import openturns as ot
import openturns.experimental as otexp
import openturns.viewer as otv
from math import pi
from time import monotonic
ot.RandomGenerator.SetSeed(1)
f = ot.SymbolicFunction(
"x", "0.5 * (2 + sin(x)^2) * exp( -( 2 + cos(3*x)^3 + sin(2*x)^3) * x )"
)
lower_bound = 0.0
upper_bound = 2.0 * pi
range_PDF = ot.Interval(lower_bound, upper_bound)
graph = f.draw(lower_bound, upper_bound, 512)
graph.setTitle(
r"Christian Robert function: $f(x) = 0.5(2 + sin^2 x) e^{ -x( 2 + cos^33x + sin^3 2x)}, \quad x \in [0, 2\pi]$"
)
graph.setXTitle("x")
graph.setYTitle(r"$f(x)$")
view = otv.View(graph)
# %%
# Case 1: Computation of the normalization factor
# -----------------------------------------------
# The best thing to do is to compute the normalization factor thanks to the integration algorithms of the library.
#
# We show how to compute the normalization factor using a :class:`~openturns.GaussKronrod` quadrature formula.
denom_fact = ot.GaussKronrod().integrate(f, range_PDF)[0]
norm_fact = 1.0 / denom_fact
print("normalization factor = ", norm_fact)
# %%
# Thus, we can define the exact PDF expression:
#
# .. math::
#
# p(x) = \dfrac{f(x)}{\int_0^{2\pi} f(u)\, du}
#
exact_PDF = ot.LinearCombinationFunction([f], [norm_fact])
# %%
# Then we define a :class:`~openturns.PythonDistribution` which:
#
# - *computePDF* is computed from the exact expression,
# - *computeCDF* is computed using an integration algorithm on the *computePDF*.
#
# Doing that way, we use the generic sampler of the distribution, based on the CDF inversion method,
# implemented in the *getSample* method.
class NewDistribution(ot.PythonDistribution):
def __init__(self):
super().__init__(1)
self.PDF_ = exact_PDF
self.logPDF_ = ot.ComposedFunction(
ot.SymbolicFunction("x", "log(x)"), self.PDF_
)
self.rangeLow_ = 0.0
self.rangeUp_ = 2.0 * pi
def getRange(self):
return ot.Interval(self.rangeLow_, self.rangeUp_)
def computeLogPDF(self, X):
if X[0] < self.rangeLow_ or X[0] >= self.rangeUp_:
return -ot.SpecFunc.Infinity
return self.logPDF_(X)[0]
def computePDF(self, X):
if X[0] < self.rangeLow_ or X[0] >= self.rangeUp_:
return 0.0
return self.PDF_(X)[0]
def computeCDF(self, X):
if X[0] < self.rangeLow_:
return 0.0
if X[0] >= self.rangeUp_:
return 1.0
return ot.GaussLegendre([64]).integrate(
self.PDF_, ot.Interval(self.rangeLow_, X[0])
)[0]
# %%
# We create the new distribution that uses the generic sampler:
newDist_generic = ot.Distribution(NewDistribution())
# %%
# We can draw the exact PDF and the PDF estimated from a sample generated by the Ratio of Uniforms algorithm.
# %%
size = 500
sample = newDist_generic.getSample(size)
ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(lower_bound)
ks_algo.setUpperBound(upper_bound)
ks_pdf_GS = ks_algo.build(sample)
g = ks_pdf_GS.drawPDF(-0.5, upper_bound * 1.1, 1001)
draw_exact = exact_PDF.draw(lower_bound, upper_bound, 1001).getDrawable(0)
draw_exact.setLineWidth(2)
g.add(draw_exact)
g.setLegends(["GS size = " + str(size), "exact pdf"])
g.setLegendPosition("topright")
g.setTitle("Generic sampler (GS)")
g.setXTitle("x")
view = otv.View(g)
# %%
# We can change the sampler of that new distribution by implementing the method *getSample* and *getRealization* as follows:
class NewDistribution_RoU(ot.PythonDistribution):
def __init__(self):
super().__init__(1)
self.PDF_ = exact_PDF
self.logPDF_ = ot.ComposedFunction(
ot.SymbolicFunction("x", "log(x)"), self.PDF_
)
self.rangeLow_ = 0.0
self.rangeUp_ = 2.0 * pi
self.sampler_ = otexp.RatioOfUniforms(self.logPDF_, self.getRange())
def getRange(self):
return ot.Interval(self.rangeLow_, self.rangeUp_)
def computeLogPDF(self, X):
if X[0] < self.rangeLow_ or X[0] >= self.rangeUp_:
return -ot.SpecFunc.Infinity
return self.logPDF_(X)[0]
def computePDF(self, X):
if X[0] < self.rangeLow_ or X[0] >= self.rangeUp_:
return 0.0
return self.PDF_(X)[0]
def computeCDF(self, X):
if X[0] < self.rangeLow_:
return 0.0
if X[0] >= self.rangeUp_:
return 1.0
return ot.GaussLegendre([32]).integrate(
self.PDF_, ot.Interval(self.rangeLow_, X[0])
)[0]
def getRealization(self):
return self.sampler_.getRealization()
def getSample(self, n):
return self.sampler_.getSample(n)
# %%
# We create the new distribution that uses the Ratio of Uniforms sampler:
newDist_RoU = ot.Distribution(NewDistribution_RoU())
# %%
# We compare the sampling speed of the distribution with the Ratio of Uniforms algorithm to the generic sampling speed.
# The Ratio of Uniforms algorithm proves to be much quicker than the generic method.
sizeRoU = 10000
sizeGeneric = 100
t0 = monotonic()
sample_RoU = newDist_RoU.getSample(sizeRoU)
t1 = monotonic()
sample_generic = newDist_generic.getSample(sizeGeneric)
t2 = monotonic()
speedRoU = sizeRoU / (t1 - t0)
speedGeneric = sizeGeneric / (t2 - t1)
print(f"Is Ratio of Uniforms faster ? {speedRoU > 10 * speedGeneric}")
# %%
# Case 2: Direct use the Ratio of Uniforms algorithm
# --------------------------------------------------
# In that case, we want to use the :class:`~openturns.experimental.RatioOfUniforms` algorithm to
# sample :math:`p`. We need to compute the function :math:`\log f` and its range.
#
# We create the function :math:`\log f` and the :class:`~openturns.experimental.RatioOfUniforms`:
log_UnscaledPDF = ot.ComposedFunction(ot.SymbolicFunction("x", "log(x)"), f)
ratio_algo = otexp.RatioOfUniforms(log_UnscaledPDF, range_PDF)
# %%
# We can draw the exact PDF and the PDF estimated from a sample generated by the Ratio of Uniforms algorithm.
# %%
size = 100000
sample = ratio_algo.getSample(size)
ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(lower_bound)
ks_algo.setUpperBound(upper_bound)
ks_pdf = ks_algo.build(sample)
g = ks_pdf.drawPDF(-0.5, upper_bound * 1.1, 1001)
g.add(draw_exact)
g.setLegends(["RoU size = " + str(size), "exact PDF"])
g.setLegendPosition("topright")
g.setTitle("Ratio of Uniforms (RoU) ")
g.setXTitle("x")
view = otv.View(g)
# %%
# In order to facilitate the comparison with the use of Metropolis-Hastings
# based algorithms, we generate
# a sample of the same size and we draw the estimated PDF.
size2 = 500
sample = ratio_algo.getSample(size2)
ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(lower_bound)
ks_algo.setUpperBound(upper_bound)
ks_pdf_RoU = ks_algo.build(sample)
draw_RoU = ks_pdf_RoU.drawPDF(-0.5, upper_bound * 1.1, 1001).getDrawable(0)
draw_RoU.setLineWidth(2)
g.add(draw_RoU)
g.setLegends(
["RoU size = " + str(size), "exact PDF", "RoU (size = " + str(size2) + ")"]
)
g.setTitle("Ratio of Uniforms")
view = otv.View(g)
# %%
# By default, the parameter :math:`r=1`. We can get the associated acceptance ratio:
print("Acceptance ratio = ", ratio_algo.getAcceptanceRatio())
# %%
# We can estimate the normalization factor with the Ratio of Uniforms algorithm (see
# the documenttaion of :class:`~openturns.experimental.RatioOfUniforms`):
print("Normalization factor estimate = ", ratio_algo.getC())
# %%
# We can change the :math:`r` parameter and check the associated acceptance ratio:
r_new = 0.5
ratio_algo.setR(r_new)
print("New acceptance ratio = ", ratio_algo.getAcceptanceRatio())
# %%
# Case 3(a): Use of Independent Metropolis-Hastings
# -------------------------------------------------
# Let us use a mixture distribution to approximate the target distribution.
#
# This approximation will serve as the instrumental distribution
# in the independent Metropolis-Hastings algorithm.
exp = ot.Exponential(1.0)
unif = ot.Normal(5.3, 0.4)
instrumental_dist = ot.Mixture([exp, unif], [0.9, 0.1])
# %%
# Compare the instrumental density to the exact PDF.
graph = instrumental_dist.drawPDF(lower_bound, upper_bound, 512)
graph.add(draw_exact)
graph.setLegends(["Instrumental PDF", "exact PDF"])
graph.setLegendPosition("upper right")
graph.setTitle("Independent Metropolis-Hastings: Instrumental PDF")
graph.setXTitle("x")
_ = otv.View(graph)
# %%
# :class:`~openturns.MetropolisHastings` and derived classes can work directly with the logarithm of the unscaled
# target density.
log_density = ot.ComposedFunction(ot.SymbolicFunction("x", "log(x)"), f)
initial_State = ot.Point([3.0]) # not important in this case
independent_IMH = ot.IndependentMetropolisHastings(
log_density, range_PDF, initial_State, instrumental_dist, [0]
)
# %%
# Get a sample
sample_Size = 500
sample_IMH = independent_IMH.getSample(sample_Size)
# %%
# Plot the PDF of the sample to compare it to the target density
ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(0.0)
ks_algo.setUpperBound(2.0 * pi)
posterior_IMH = ks_algo.build(sample_IMH)
g_IMH = posterior_IMH.drawPDF(-0.5, upper_bound * 1.1, 1001)
g_IMH.add(draw_exact)
g_IMH.setLegends(["IMH size = {}".format(sample_Size), "exact PDF"])
g_IMH.setTitle("Independent Metropolis-Hastings (IMH)")
g_IMH.setXTitle("x")
g_IMH.setLegendPosition("topright")
view = otv.View(g_IMH)
# %%
# Even with very few sampling points (500),
# independent Metropolis-Hastings
# (with a judiciously chosen instrumental distribution)
# manages to capture the main features of the target density.
# %%
# Case 3(b): Use of Random walk Metropolis-Hastings
# -------------------------------------------------
# Let us use a normal instrumental distribution.
instrumental_dist = ot.Normal(0.0, 2.5)
randomwalk_MH = ot.RandomWalkMetropolisHastings(
log_density, range_PDF, initial_State, instrumental_dist, [0]
)
# %%
# Get a sample
sample_RWMH = randomwalk_MH.getSample(sample_Size)
# %%
# Plot the PDF of the sample to compare it to the target density
ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(0.0)
ks_algo.setUpperBound(2.0 * pi)
posterior_RWMH = ks_algo.build(sample_RWMH)
g_RWMH = posterior_RWMH.drawPDF(-0.5, upper_bound * 1.1, 1001)
g_RWMH.add(draw_exact)
g_RWMH.setLegends(["RWMH size = {}".format(sample_Size), "exact PDF"])
g_RWMH.setTitle("Random walk Metropolis-Hastings (RWMH)")
g_RWMH.setXTitle("x")
g_RWMH.setLegendPosition("topright")
view = otv.View(g_RWMH)
# %%
# Final comparison
# ----------------
# We plot on the same graph the estimated PDF with all the previous algorithms with the same sample size.
# sphinx_gallery_thumbnail_number = 8
g = ks_pdf_GS.drawPDF(-0.5, upper_bound * 1.1, 1001)
g.add(posterior_RWMH.drawPDF(-0.5, upper_bound * 1.1, 1001))
g.add(posterior_IMH.drawPDF(-0.5, upper_bound * 1.1, 1001))
g.add(ks_pdf_RoU.drawPDF(-0.5, upper_bound * 1.1, 1001))
draw_exact.setLineStyle("dashed")
draw_exact.setColor("black")
g.add(draw_exact)
g.setLegends(["GS", "RWMH", "IMH", "RoU", "exact PDF"])
g.setTitle("Comparison of samplers (size = 500)")
g.setXTitle("x")
view = otv.View(g)
# %%
view.ShowAll()
# %%
# References
# -----------
# [1] Marin , J.M. & Robert, C.P. (2007). *Bayesian Core: A Practical Approach to Computational Bayesian Statistics*. Springer-Verlag, New York
|