File: plot_imh_python_distribution.py

package info (click to toggle)
openturns 1.26-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 67,708 kB
  • sloc: cpp: 261,605; python: 67,030; ansic: 4,378; javascript: 406; sh: 185; xml: 164; makefile: 101
file content (387 lines) | stat: -rw-r--r-- 12,536 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
"""
Sampling from an unscaled probability density
=============================================
"""

# %%
# In this example, we show different ways to sample a distribution which PDF is known up to a normalization constant:
#
# - Case 1: computing the normalization factor and using a :class:`~openturns.PythonDistribution`,
# - Case 2: using the Ratio of Uniforms algorithm with the class :class:`~openturns.experimental.RatioOfUniforms`
# - Case 3: using some Metropolis-Hastings algorithms with the class :class:`~openturns.IndependentMetropolisHastings`
#   and :class:`~openturns.RandomWalkMetropolisHastings`.
#
# Consider a distribution whose
# probability density function :math:`p` is known up to the normalization factor :math:`c`:
# let :math:`f` be a function such that
# :math:`p = cf` with :math:`c \in \Rset^+_*`.
#
# We illustrate the case with:
#
# .. math::
#
#     f(x) = \frac{1}{2} (2 + \sin(x)^2) \exp \left[- \left(2 + \cos(3x)^3 + \sin(2x)^3 \right) x
#     \right]  \mathbf{1}_{[0, 2 \pi]}(x).
#
# First, we draw the unscaled probability density function.

import openturns as ot
import openturns.experimental as otexp
import openturns.viewer as otv
from math import pi
from time import monotonic

ot.RandomGenerator.SetSeed(1)
f = ot.SymbolicFunction(
    "x", "0.5 * (2 + sin(x)^2) * exp( -( 2 + cos(3*x)^3 + sin(2*x)^3) * x )"
)
lower_bound = 0.0
upper_bound = 2.0 * pi
range_PDF = ot.Interval(lower_bound, upper_bound)
graph = f.draw(lower_bound, upper_bound, 512)
graph.setTitle(
    r"Christian Robert function: $f(x) =  0.5(2 + sin^2 x) e^{ -x( 2 + cos^33x + sin^3 2x)}, \quad x \in [0, 2\pi]$"
)
graph.setXTitle("x")
graph.setYTitle(r"$f(x)$")
view = otv.View(graph)


# %%
# Case 1: Computation of the normalization factor
# -----------------------------------------------
# The best thing to do is to compute the normalization factor thanks to the integration algorithms of the library.
#
# We show how to compute the normalization factor using a :class:`~openturns.GaussKronrod` quadrature formula.
denom_fact = ot.GaussKronrod().integrate(f, range_PDF)[0]
norm_fact = 1.0 / denom_fact
print("normalization factor = ", norm_fact)

# %%
# Thus, we can define the exact PDF expression:
#
# .. math::
#
#   p(x) = \dfrac{f(x)}{\int_0^{2\pi} f(u)\, du}
#
exact_PDF = ot.LinearCombinationFunction([f], [norm_fact])

# %%
# Then we define a :class:`~openturns.PythonDistribution` which:
#
# - *computePDF* is computed from the exact expression,
# - *computeCDF* is computed using an integration algorithm on the *computePDF*.
#
# Doing that way, we use the generic sampler of the distribution, based on the CDF inversion method,
# implemented in the *getSample* method.


class NewDistribution(ot.PythonDistribution):
    def __init__(self):
        super().__init__(1)
        self.PDF_ = exact_PDF
        self.logPDF_ = ot.ComposedFunction(
            ot.SymbolicFunction("x", "log(x)"), self.PDF_
        )
        self.rangeLow_ = 0.0
        self.rangeUp_ = 2.0 * pi

    def getRange(self):
        return ot.Interval(self.rangeLow_, self.rangeUp_)

    def computeLogPDF(self, X):
        if X[0] < self.rangeLow_ or X[0] >= self.rangeUp_:
            return -ot.SpecFunc.Infinity
        return self.logPDF_(X)[0]

    def computePDF(self, X):
        if X[0] < self.rangeLow_ or X[0] >= self.rangeUp_:
            return 0.0
        return self.PDF_(X)[0]

    def computeCDF(self, X):
        if X[0] < self.rangeLow_:
            return 0.0
        if X[0] >= self.rangeUp_:
            return 1.0
        return ot.GaussLegendre([64]).integrate(
            self.PDF_, ot.Interval(self.rangeLow_, X[0])
        )[0]


# %%
# We create the new distribution that uses the generic sampler:
newDist_generic = ot.Distribution(NewDistribution())

# %%
# We can draw the exact PDF and the PDF estimated from a sample generated by the Ratio of Uniforms algorithm.

# %%
size = 500
sample = newDist_generic.getSample(size)

ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(lower_bound)
ks_algo.setUpperBound(upper_bound)
ks_pdf_GS = ks_algo.build(sample)

g = ks_pdf_GS.drawPDF(-0.5, upper_bound * 1.1, 1001)
draw_exact = exact_PDF.draw(lower_bound, upper_bound, 1001).getDrawable(0)
draw_exact.setLineWidth(2)
g.add(draw_exact)
g.setLegends(["GS size = " + str(size), "exact pdf"])
g.setLegendPosition("topright")
g.setTitle("Generic sampler (GS)")
g.setXTitle("x")
view = otv.View(g)

# %%
# We can change the sampler of that new distribution by implementing the method *getSample* and *getRealization* as follows:


class NewDistribution_RoU(ot.PythonDistribution):
    def __init__(self):
        super().__init__(1)
        self.PDF_ = exact_PDF
        self.logPDF_ = ot.ComposedFunction(
            ot.SymbolicFunction("x", "log(x)"), self.PDF_
        )
        self.rangeLow_ = 0.0
        self.rangeUp_ = 2.0 * pi
        self.sampler_ = otexp.RatioOfUniforms(self.logPDF_, self.getRange())

    def getRange(self):
        return ot.Interval(self.rangeLow_, self.rangeUp_)

    def computeLogPDF(self, X):
        if X[0] < self.rangeLow_ or X[0] >= self.rangeUp_:
            return -ot.SpecFunc.Infinity
        return self.logPDF_(X)[0]

    def computePDF(self, X):
        if X[0] < self.rangeLow_ or X[0] >= self.rangeUp_:
            return 0.0
        return self.PDF_(X)[0]

    def computeCDF(self, X):
        if X[0] < self.rangeLow_:
            return 0.0
        if X[0] >= self.rangeUp_:
            return 1.0
        return ot.GaussLegendre([32]).integrate(
            self.PDF_, ot.Interval(self.rangeLow_, X[0])
        )[0]

    def getRealization(self):
        return self.sampler_.getRealization()

    def getSample(self, n):
        return self.sampler_.getSample(n)


# %%
# We create the new distribution that uses the Ratio of Uniforms sampler:
newDist_RoU = ot.Distribution(NewDistribution_RoU())

# %%
# We compare the sampling speed of the distribution with the Ratio of Uniforms algorithm to the generic sampling speed.
# The Ratio of Uniforms algorithm proves to be much quicker than the generic method.
sizeRoU = 10000
sizeGeneric = 100

t0 = monotonic()
sample_RoU = newDist_RoU.getSample(sizeRoU)
t1 = monotonic()
sample_generic = newDist_generic.getSample(sizeGeneric)
t2 = monotonic()

speedRoU = sizeRoU / (t1 - t0)
speedGeneric = sizeGeneric / (t2 - t1)
print(f"Is Ratio of Uniforms faster ? {speedRoU > 10 * speedGeneric}")

# %%
# Case 2: Direct use the Ratio of Uniforms algorithm
# --------------------------------------------------
# In that case, we want to use the :class:`~openturns.experimental.RatioOfUniforms` algorithm to
# sample :math:`p`. We need to compute the function :math:`\log f` and its range.
#
# We create the function :math:`\log f` and the :class:`~openturns.experimental.RatioOfUniforms`:
log_UnscaledPDF = ot.ComposedFunction(ot.SymbolicFunction("x", "log(x)"), f)
ratio_algo = otexp.RatioOfUniforms(log_UnscaledPDF, range_PDF)

# %%
# We can draw the exact PDF and the PDF estimated from a sample generated by the Ratio of Uniforms algorithm.

# %%
size = 100000
sample = ratio_algo.getSample(size)

ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(lower_bound)
ks_algo.setUpperBound(upper_bound)
ks_pdf = ks_algo.build(sample)

g = ks_pdf.drawPDF(-0.5, upper_bound * 1.1, 1001)
g.add(draw_exact)
g.setLegends(["RoU size = " + str(size), "exact PDF"])
g.setLegendPosition("topright")
g.setTitle("Ratio of Uniforms (RoU) ")
g.setXTitle("x")
view = otv.View(g)

# %%
# In order to facilitate the comparison with the use of Metropolis-Hastings
# based algorithms, we generate
# a sample of the same size and we draw the estimated PDF.
size2 = 500
sample = ratio_algo.getSample(size2)

ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(lower_bound)
ks_algo.setUpperBound(upper_bound)
ks_pdf_RoU = ks_algo.build(sample)

draw_RoU = ks_pdf_RoU.drawPDF(-0.5, upper_bound * 1.1, 1001).getDrawable(0)
draw_RoU.setLineWidth(2)
g.add(draw_RoU)
g.setLegends(
    ["RoU size = " + str(size), "exact PDF", "RoU (size = " + str(size2) + ")"]
)
g.setTitle("Ratio of Uniforms")
view = otv.View(g)

# %%
# By default, the parameter :math:`r=1`. We can get the associated acceptance ratio:
print("Acceptance ratio = ", ratio_algo.getAcceptanceRatio())

# %%
# We can estimate the normalization factor with the  Ratio of Uniforms algorithm (see
# the documenttaion of :class:`~openturns.experimental.RatioOfUniforms`):
print("Normalization factor estimate = ", ratio_algo.getC())

# %%
# We can change the :math:`r` parameter and check the associated acceptance ratio:
r_new = 0.5
ratio_algo.setR(r_new)
print("New acceptance ratio = ", ratio_algo.getAcceptanceRatio())

# %%
# Case 3(a): Use of Independent Metropolis-Hastings
# -------------------------------------------------
# Let us use a mixture distribution to approximate the target distribution.
#
# This approximation will serve as the instrumental distribution
# in the independent Metropolis-Hastings algorithm.

exp = ot.Exponential(1.0)
unif = ot.Normal(5.3, 0.4)
instrumental_dist = ot.Mixture([exp, unif], [0.9, 0.1])

# %%
# Compare the instrumental density to the exact PDF.
graph = instrumental_dist.drawPDF(lower_bound, upper_bound, 512)
graph.add(draw_exact)
graph.setLegends(["Instrumental PDF", "exact PDF"])
graph.setLegendPosition("upper right")
graph.setTitle("Independent Metropolis-Hastings: Instrumental PDF")
graph.setXTitle("x")
_ = otv.View(graph)

# %%
# :class:`~openturns.MetropolisHastings` and derived classes can work directly with the logarithm of the unscaled
# target density.

log_density = ot.ComposedFunction(ot.SymbolicFunction("x", "log(x)"), f)

initial_State = ot.Point([3.0])  # not important in this case
independent_IMH = ot.IndependentMetropolisHastings(
    log_density, range_PDF, initial_State, instrumental_dist, [0]
)

# %%
# Get a sample
sample_Size = 500
sample_IMH = independent_IMH.getSample(sample_Size)


# %%
# Plot the PDF of the sample to compare it to the target density
ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(0.0)
ks_algo.setUpperBound(2.0 * pi)
posterior_IMH = ks_algo.build(sample_IMH)

g_IMH = posterior_IMH.drawPDF(-0.5, upper_bound * 1.1, 1001)
g_IMH.add(draw_exact)
g_IMH.setLegends(["IMH size = {}".format(sample_Size), "exact PDF"])
g_IMH.setTitle("Independent Metropolis-Hastings (IMH)")
g_IMH.setXTitle("x")
g_IMH.setLegendPosition("topright")
view = otv.View(g_IMH)

# %%
# Even with very few sampling points (500),
# independent Metropolis-Hastings
# (with a judiciously chosen instrumental distribution)
# manages to capture the main features of the target density.

# %%
# Case 3(b): Use of Random walk Metropolis-Hastings
# -------------------------------------------------
# Let us use a normal instrumental distribution.

instrumental_dist = ot.Normal(0.0, 2.5)
randomwalk_MH = ot.RandomWalkMetropolisHastings(
    log_density, range_PDF, initial_State, instrumental_dist, [0]
)

# %%
# Get a sample
sample_RWMH = randomwalk_MH.getSample(sample_Size)

# %%
# Plot the PDF of the sample to compare it to the target density

ks_algo = ot.KernelSmoothing()
ks_algo.setBoundaryCorrection(True)
ks_algo.setLowerBound(0.0)
ks_algo.setUpperBound(2.0 * pi)
posterior_RWMH = ks_algo.build(sample_RWMH)

g_RWMH = posterior_RWMH.drawPDF(-0.5, upper_bound * 1.1, 1001)
g_RWMH.add(draw_exact)
g_RWMH.setLegends(["RWMH size = {}".format(sample_Size), "exact PDF"])
g_RWMH.setTitle("Random walk Metropolis-Hastings (RWMH)")
g_RWMH.setXTitle("x")
g_RWMH.setLegendPosition("topright")
view = otv.View(g_RWMH)

# %%
# Final comparison
# ----------------
# We plot on the same graph the estimated PDF with all the previous algorithms with the same sample size.
# sphinx_gallery_thumbnail_number = 8
g = ks_pdf_GS.drawPDF(-0.5, upper_bound * 1.1, 1001)
g.add(posterior_RWMH.drawPDF(-0.5, upper_bound * 1.1, 1001))
g.add(posterior_IMH.drawPDF(-0.5, upper_bound * 1.1, 1001))
g.add(ks_pdf_RoU.drawPDF(-0.5, upper_bound * 1.1, 1001))
draw_exact.setLineStyle("dashed")
draw_exact.setColor("black")
g.add(draw_exact)
g.setLegends(["GS", "RWMH", "IMH", "RoU", "exact PDF"])
g.setTitle("Comparison of samplers (size = 500)")
g.setXTitle("x")
view = otv.View(g)


# %%
view.ShowAll()

# %%
# References
# -----------
# [1] Marin , J.M. & Robert, C.P. (2007). *Bayesian Core: A Practical Approach to Computational Bayesian Statistics*. Springer-Verlag, New York