File: plot_imh_python_distribution.py

package info (click to toggle)
openturns 1.26-4
links: PTS, VCS
area: main
in suites: forky, sid
size: 67,708 kB
sloc: cpp: 261,605; python: 67,030; ansic: 4,378; javascript: 406; sh: 185; xml: 164; makefile: 101
file content (387 lines) | stat: -rw-r--r-- 12,536 bytes
"""
Sampling from an unscaled probability density
=============================================
"""

# %%
# In this example, we show different ways to sample a distribution which PDF is known up to a normalization constant:
#
# - Case 1: computing the normalization factor and using a :class:`~openturns.PythonDistribution`,
# - Case 2: using the Ratio of Uniforms algorithm with the class :class:`~openturns.experimental.RatioOfUniforms`
# - Case 3: using some Metropolis-Hastings algorithms with the class :class:`~openturns.IndependentMetropolisHastings`
#   and :class:`~openturns.RandomWalkMetropolisHastings`.
#
# Consider a distribution whose
# probability density function :math:`p` is known up to the normalization factor :math:`c`:
# let :math:`f` be a function such that
# :math:`p = cf` with :math:`c \in \Rset^+_*`.
#
# We illustrate the case with:
#
# .. math::
#
#     f(x) = \frac{1}{2} (2 + \sin(x)^2) \exp \left[- \left(2 + \cos(3x)^3 + \sin(2x)^3 \right) x
#     \right]  \mathbf{1}_{[0, 2 \pi]}(x).
#
# First, we draw the unscaled probability density function.

import openturns as ot
import openturns.experimental as otexp
import openturns.viewer as otv
from math import pi
from time import monotonic

ot.RandomGenerator.SetSeed(1)
f = ot.SymbolicFunction(
    "x", "0.5 * (2 + sin(x)^2) * exp( -( 2 + cos(3*x)^3 + sin(2*x)^3) * x )"
)
lower_bound = 0.0
upper_bound = 2.0 * pi
range_PDF = ot.Interval(lower_bound, upper_bound)
graph = f.draw(lower_bound, upper_bound, 512)
graph.setTitle(
    r"Christian Robert function: $f(x) =  0.5(2 + sin^2 x) e^{ -x( 2 + cos^33x + sin^3 2x)}, \quad x \in [0, 2\pi]$"
)
graph.setXTitle("x")
graph.setYTitle(r"$f(x)$")
view = otv.View(graph)


# %%
# Case 1: Computation of the normalization factor
# -----------------------------------------------
# The best thing to do is to compute the normalization factor thanks to the integration algorithms of the library.
#
# We show how to compute the normalization factor using a :class:`~openturns.GaussKronrod` quadrature formula.
denom_fact = ot.GaussKronrod().integrate(f, range_PDF)[0]
norm_fact = 1.0 / denom_fact
print("normalization factor = ", norm_fact)

# %%
# Thus, we can define the exact PDF expression:
#
# .. math::
#
#   p(x) = \dfrac{f(x)}{\int_0^{2\pi} f(u)\, du}
#
exact_PDF = ot.LinearCombinationFunction([f], [norm_fact])

# %%
# Then we define a :class:`~openturns.PythonDistribution` which:
#
# - *computePDF* is computed from the exact expression,
# - *computeCDF* is computed using an integration algorithm on the *computePDF*.
#
# Doing that way, we use the generic sampler of the distribution, based on the CDF inversion method,
# implemented in the *getSample* method.


class NewDistribution(ot.PythonDistribution):
    def __init__(self):
        super().__init__(1)
        self.PDF_ = exact_PDF
        self.logPDF_ = ot.ComposedFunction(
            ot.SymbolicFunction("x", "log(x)"), self.PDF_
        )
        self.rangeLow_ = 0.0
        self.rangeUp_ = 2.0 * pi

    def getRange(self):
        return ot.Interval(self.rangeLow_, self.rangeUp_)

    def computeLogPDF(self, X):
        if X[0] < self.rangeLow_ or X[0] >= self.rangeUp_:
            return -ot.SpecFunc.Infinity
        return self.logPDF_(X)[0]

    def computePDF(self, X):
        if X[0] < self.rangeLow_ or X[0] >= self.rangeUp_:
            return 0.0
        return self.PDF_(X)[0]

    def computeCDF(self, X):
        if X[0] < self.rangeLow_:
            return 0.0
        if X[0] >= self.rangeUp_:
            return 1.0
        return ot.GaussLegendre([64]).integrate(
            self.PDF_, ot.Interval(self.rangeLow_, X[0])
        )[0]


# %%
# We create the new distribution that uses the generic sampler:
newDist_generic = ot.Distribution(NewDistribution())

# %%
# We can draw the exact PDF and the PDF estimated from a sample generated by the Ratio of Uniforms algorithm.

# %%
size = 500
sample = newDist_generic.getSample(size)

ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(lower_bound)
ks_algo.setUpperBound(upper_bound)
ks_pdf_GS = ks_algo.build(sample)

g = ks_pdf_GS.drawPDF(-0.5, upper_bound * 1.1, 1001)
draw_exact = exact_PDF.draw(lower_bound, upper_bound, 1001).getDrawable(0)
draw_exact.setLineWidth(2)
g.add(draw_exact)
g.setLegends(["GS size = " + str(size), "exact pdf"])
g.setLegendPosition("topright")
g.setTitle("Generic sampler (GS)")
g.setXTitle("x")
view = otv.View(g)

# %%
# We can change the sampler of that new distribution by implementing the method *getSample* and *getRealization* as follows:


class NewDistribution_RoU(ot.PythonDistribution):
    def __init__(self):
        super().__init__(1)
        self.PDF_ = exact_PDF
        self.logPDF_ = ot.ComposedFunction(
            ot.SymbolicFunction("x", "log(x)"), self.PDF_
        )
        self.rangeLow_ = 0.0
        self.rangeUp_ = 2.0 * pi
        self.sampler_ = otexp.RatioOfUniforms(self.logPDF_, self.getRange())

    def getRange(self):
        return ot.Interval(self.rangeLow_, self.rangeUp_)

    def computeLogPDF(self, X):
        if X[0] < self.rangeLow_ or X[0] >= self.rangeUp_:
            return -ot.SpecFunc.Infinity
        return self.logPDF_(X)[0]

    def computePDF(self, X):
        if X[0] < self.rangeLow_ or X[0] >= self.rangeUp_:
            return 0.0
        return self.PDF_(X)[0]

    def computeCDF(self, X):
        if X[0] < self.rangeLow_:
            return 0.0
        if X[0] >= self.rangeUp_:
            return 1.0
        return ot.GaussLegendre([32]).integrate(
            self.PDF_, ot.Interval(self.rangeLow_, X[0])
        )[0]

    def getRealization(self):
        return self.sampler_.getRealization()

    def getSample(self, n):
        return self.sampler_.getSample(n)


# %%
# We create the new distribution that uses the Ratio of Uniforms sampler:
newDist_RoU = ot.Distribution(NewDistribution_RoU())

# %%
# We compare the sampling speed of the distribution with the Ratio of Uniforms algorithm to the generic sampling speed.
# The Ratio of Uniforms algorithm proves to be much quicker than the generic method.
sizeRoU = 10000
sizeGeneric = 100

t0 = monotonic()
sample_RoU = newDist_RoU.getSample(sizeRoU)
t1 = monotonic()
sample_generic = newDist_generic.getSample(sizeGeneric)
t2 = monotonic()

speedRoU = sizeRoU / (t1 - t0)
speedGeneric = sizeGeneric / (t2 - t1)
print(f"Is Ratio of Uniforms faster ? {speedRoU > 10 * speedGeneric}")

# %%
# Case 2: Direct use the Ratio of Uniforms algorithm
# --------------------------------------------------
# In that case, we want to use the :class:`~openturns.experimental.RatioOfUniforms` algorithm to
# sample :math:`p`. We need to compute the function :math:`\log f` and its range.
#
# We create the function :math:`\log f` and the :class:`~openturns.experimental.RatioOfUniforms`:
log_UnscaledPDF = ot.ComposedFunction(ot.SymbolicFunction("x", "log(x)"), f)
ratio_algo = otexp.RatioOfUniforms(log_UnscaledPDF, range_PDF)

# %%
# We can draw the exact PDF and the PDF estimated from a sample generated by the Ratio of Uniforms algorithm.

# %%
size = 100000
sample = ratio_algo.getSample(size)

ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(lower_bound)
ks_algo.setUpperBound(upper_bound)
ks_pdf = ks_algo.build(sample)

g = ks_pdf.drawPDF(-0.5, upper_bound * 1.1, 1001)
g.add(draw_exact)
g.setLegends(["RoU size = " + str(size), "exact PDF"])
g.setLegendPosition("topright")
g.setTitle("Ratio of Uniforms (RoU) ")
g.setXTitle("x")
view = otv.View(g)

# %%
# In order to facilitate the comparison with the use of Metropolis-Hastings
# based algorithms, we generate
# a sample of the same size and we draw the estimated PDF.
size2 = 500
sample = ratio_algo.getSample(size2)

ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(lower_bound)
ks_algo.setUpperBound(upper_bound)
ks_pdf_RoU = ks_algo.build(sample)

draw_RoU = ks_pdf_RoU.drawPDF(-0.5, upper_bound * 1.1, 1001).getDrawable(0)
draw_RoU.setLineWidth(2)
g.add(draw_RoU)
g.setLegends(
    ["RoU size = " + str(size), "exact PDF", "RoU (size = " + str(size2) + ")"]
)
g.setTitle("Ratio of Uniforms")
view = otv.View(g)

# %%
# By default, the parameter :math:`r=1`. We can get the associated acceptance ratio:
print("Acceptance ratio = ", ratio_algo.getAcceptanceRatio())

# %%
# We can estimate the normalization factor with the  Ratio of Uniforms algorithm (see
# the documenttaion of :class:`~openturns.experimental.RatioOfUniforms`):
print("Normalization factor estimate = ", ratio_algo.getC())

# %%
# We can change the :math:`r` parameter and check the associated acceptance ratio:
r_new = 0.5
ratio_algo.setR(r_new)
print("New acceptance ratio = ", ratio_algo.getAcceptanceRatio())

# %%
# Case 3(a): Use of Independent Metropolis-Hastings
# -------------------------------------------------
# Let us use a mixture distribution to approximate the target distribution.
#
# This approximation will serve as the instrumental distribution
# in the independent Metropolis-Hastings algorithm.

exp = ot.Exponential(1.0)
unif = ot.Normal(5.3, 0.4)
instrumental_dist = ot.Mixture([exp, unif], [0.9, 0.1])

# %%
# Compare the instrumental density to the exact PDF.
graph = instrumental_dist.drawPDF(lower_bound, upper_bound, 512)
graph.add(draw_exact)
graph.setLegends(["Instrumental PDF", "exact PDF"])
graph.setLegendPosition("upper right")
graph.setTitle("Independent Metropolis-Hastings: Instrumental PDF")
graph.setXTitle("x")
_ = otv.View(graph)

# %%
# :class:`~openturns.MetropolisHastings` and derived classes can work directly with the logarithm of the unscaled
# target density.

log_density = ot.ComposedFunction(ot.SymbolicFunction("x", "log(x)"), f)

initial_State = ot.Point([3.0])  # not important in this case
independent_IMH = ot.IndependentMetropolisHastings(
    log_density, range_PDF, initial_State, instrumental_dist, [0]
)

# %%
# Get a sample
sample_Size = 500
sample_IMH = independent_IMH.getSample(sample_Size)


# %%
# Plot the PDF of the sample to compare it to the target density
ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(0.0)
ks_algo.setUpperBound(2.0 * pi)
posterior_IMH = ks_algo.build(sample_IMH)

g_IMH = posterior_IMH.drawPDF(-0.5, upper_bound * 1.1, 1001)
g_IMH.add(draw_exact)
g_IMH.setLegends(["IMH size = {}".format(sample_Size), "exact PDF"])
g_IMH.setTitle("Independent Metropolis-Hastings (IMH)")
g_IMH.setXTitle("x")
g_IMH.setLegendPosition("topright")
view = otv.View(g_IMH)

# %%
# Even with very few sampling points (500),
# independent Metropolis-Hastings
# (with a judiciously chosen instrumental distribution)
# manages to capture the main features of the target density.

# %%
# Case 3(b): Use of Random walk Metropolis-Hastings
# -------------------------------------------------
# Let us use a normal instrumental distribution.

instrumental_dist = ot.Normal(0.0, 2.5)
randomwalk_MH = ot.RandomWalkMetropolisHastings(
    log_density, range_PDF, initial_State, instrumental_dist, [0]
)

# %%
# Get a sample
sample_RWMH = randomwalk_MH.getSample(sample_Size)

# %%
# Plot the PDF of the sample to compare it to the target density

ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(0.0)
ks_algo.setUpperBound(2.0 * pi)
posterior_RWMH = ks_algo.build(sample_RWMH)

g_RWMH = posterior_RWMH.drawPDF(-0.5, upper_bound * 1.1, 1001)
g_RWMH.add(draw_exact)
g_RWMH.setLegends(["RWMH size = {}".format(sample_Size), "exact PDF"])
g_RWMH.setTitle("Random walk Metropolis-Hastings (RWMH)")
g_RWMH.setXTitle("x")
g_RWMH.setLegendPosition("topright")
view = otv.View(g_RWMH)

# %%
# Final comparison
# ----------------
# We plot on the same graph the estimated PDF with all the previous algorithms with the same sample size.
# sphinx_gallery_thumbnail_number = 8
g = ks_pdf_GS.drawPDF(-0.5, upper_bound * 1.1, 1001)
g.add(posterior_RWMH.drawPDF(-0.5, upper_bound * 1.1, 1001))
g.add(posterior_IMH.drawPDF(-0.5, upper_bound * 1.1, 1001))
g.add(ks_pdf_RoU.drawPDF(-0.5, upper_bound * 1.1, 1001))
draw_exact.setLineStyle("dashed")
draw_exact.setColor("black")
g.add(draw_exact)
g.setLegends(["GS", "RWMH", "IMH", "RoU", "exact PDF"])
g.setTitle("Comparison of samplers (size = 500)")
g.setXTitle("x")
view = otv.View(g)


# %%
view.ShowAll()

# %%
# References
# -----------
# [1] Marin , J.M. & Robert, C.P. (2007). *Bayesian Core: A Practical Approach to Computational Bayesian Statistics*. Springer-Verlag, New York