File: binomial.py

package info (click to toggle)
python-bitarray 3.6.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,288 kB
  • sloc: python: 11,456; ansic: 7,657; makefile: 73; sh: 6
file content (134 lines) | stat: -rw-r--r-- 4,365 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import sys
from math import sqrt

from scipy.special import betainc


class BinomialDist:

    # This class describes the binomial distribution with parameters n and p.
    # That is, the (discrete) probability distribution of the number of
    # successes in a sequence of n independent Bernoulli trails, with each
    # trail having a probability p of success.

    def __init__(self, n, p):
        assert n > 0
        assert 0.0 <= p <= 1.0
        self.n = n
        self.p = p
        self.q = 1.0 - p
        self.mu = n * p
        self.sigma = sqrt(n * p * self.q)

    def print(self):
        print("n = %d    p = %f" % (self.n, self.p))
        print("mu = %f" % self.mu)
        print("sigma = %f" % self.sigma)

    def pmf(self, k):
        assert 0 <= k <= self.n, k
        # The reason we use .cdf() to calculate the PMF is because
        # comb(n, k) * p ** k  *  (1.0 - p) ** (n - k)  will fail for large
        # n, whereas .cdf() uses the regularized incomplete beta function.
        return self.cdf(k) - self.cdf(k - 1)

    def cdf(self, k):
        return betainc(self.n - k, k + 1, self.q)

    def range_k(self, k1, k2):
        "probability for k being in k1 <= k <= k2"
        assert 0 <= k1 <= k2 <= self.n
        return self.cdf(k2) - self.cdf(k1 - 1)

# ---------------------------------------------------------------------------

import unittest
from math import comb

class BinomialDistTests(unittest.TestCase):

    def test_pmf_simple(self):
        b = BinomialDist(1, 0.7)
        self.assertAlmostEqual(b.pmf(0), 0.3)
        self.assertAlmostEqual(b.pmf(1), 0.7)

        b = BinomialDist(2, 0.5)
        self.assertAlmostEqual(b.pmf(0), 0.25)
        self.assertAlmostEqual(b.pmf(1), 0.50)
        self.assertAlmostEqual(b.pmf(2), 0.25)

    def test_pmf_sum(self):
        for n in 10, 100, 1_000, 10_000:
            b = BinomialDist(n, 0.5)
            tot = 0
            for k in range(n + 1):
                tot += b.pmf(k)
            self.assertAlmostEqual(tot, 1.0)

    def test_pmf(self):
        for n in 10, 50, 100, 250:
            for p in 0.1, 0.2, 0.5, 0.7:
                b = BinomialDist(n, p)
                for k in range(n + 1):
                    res = comb(n, k) * p ** k * (1.0 - p) ** (n - k)
                    self.assertAlmostEqual(b.pmf(k), res, delta=1e-14)

    def test_cdf(self):
        for n in 5, 50, 500:
            b = BinomialDist(n, 0.3)
            self.assertAlmostEqual(b.cdf(-1), 0.0)
            self.assertAlmostEqual(b.cdf(n), 1.0)
            sm = 0.0
            for k in range(n + 1):
                sm += b.pmf(k)
                self.assertAlmostEqual(b.cdf(k), sm)

    def test_range_k(self):
        n = 10_000
        for p in 0.1, 0.2, 0.5, 0.7:
            b = BinomialDist(n, p)
            self.assertAlmostEqual(b.range_k(0, n), 1.0)
            for k in range(n + 1):
                self.assertAlmostEqual(b.range_k(k, k), b.pmf(k))

    def test_range_half(self):
        n = 1_000_001
        b = BinomialDist(n, 0.5)
        self.assertAlmostEqual(b.range_k(0, 500_000), 0.5)
        self.assertAlmostEqual(b.range_k(500_001, n), 0.5)


if __name__ == '__main__':
    if len(sys.argv) == 1:
        unittest.main()

    # This code was used to create some of the tests for util.random_p()
    # in ../test_random.py
    #
    # python binomial.py 250_000 5 0.3
    # python binomial.py 100_000 100 .375 37..48
    # python binomial.py 25_000 100_000 .5 48_000..50_000 50_000..50_200

    m, n = [int(i) for i in sys.argv[1:3]]
    p = float(sys.argv[3])
    bd = BinomialDist(n, p)
    bd.print()
    if n <= 100_000:
        p_tot = 0.0
        for k in range(n + 1):
            p = bd.pmf(k)
            p_tot += p
            if p < 0.01:
                continue
            bb = BinomialDist(m, p)
            fmt = "self.assertTrue(abs(C[{:d}] - {:6_d}) <= {:6_d})  # p = {:f}"
            print(fmt.format(k, round(bb.mu), round(10 * bb.sigma), p))
        assert abs(p_tot - 1.0) < 1e-15, p_tot

    for s in sys.argv[4:]:
        k1, k2 = [int(t) for t in s.split("..")]
        p = bd.range_k(k1, k2)
        bb = BinomialDist(m, p)
        print("x = sum(C[k] for k in %s)" % range(k1, k2 + 1))
        fmt = "self.assertTrue(abs(x - {:6_d}) <= {:6_d})  # p = {:f}"
        print(fmt.format(round(bb.mu), round(10 * bb.sigma), p))