File: chi2.py

package info (click to toggle)
python-biopython 1.78%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 65,756 kB
  • sloc: python: 221,141; xml: 178,777; ansic: 13,369; sql: 1,208; makefile: 131; sh: 70
file content (136 lines) | stat: -rw-r--r-- 3,727 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com)
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
#
# This code is adapted (with permission) from the C source code of chi2.c,
# written by Ziheng Yang and included in the PAML software package:
# http://abacus.gene.ucl.ac.uk/software/paml.html

"""Methods to calculate p-values from a Chi-squared cumulative distribution function.

for likelihood ratio tests.
"""

from math import log, exp


def cdf_chi2(df, stat):
    """Compute p-value, from distribution function and test statistics."""
    if df < 1:
        raise ValueError("df must be at least 1")
    if stat < 0:
        raise ValueError("The test statistic must be positive")
    x = 0.5 * stat
    alpha = df / 2.0
    prob = 1 - _incomplete_gamma(x, alpha)
    return prob


def _ln_gamma_function(alpha):
    """Compute the log of the gamma function for a given alpha (PRIVATE).

    Comments from Z. Yang:
    Returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places.
    Stirling's formula is used for the central polynomial part of the procedure.
    Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.
    Communications of the Association for Computing Machinery, 9:684
    """
    if alpha <= 0:
        raise ValueError
    x = alpha
    f = 0
    if x < 7:
        f = 1
        z = x
        while z < 7:
            f *= z
            z += 1
        x = z
        f = -log(f)
    z = 1 / (x * x)
    return (
        f
        + (x - 0.5) * log(x)
        - x
        + 0.918938533204673
        + (
            ((-0.000595238095238 * z + 0.000793650793651) * z - 0.002777777777778) * z
            + 0.083333333333333
        )
        / x
    )


def _incomplete_gamma(x, alpha):
    """Compute an incomplete gamma ratio (PRIVATE).

    Comments from Z. Yang::

        Returns the incomplete gamma ratio I(x,alpha) where x is the upper
               limit of the integration and alpha is the shape parameter.
        returns (-1) if in error
        ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant.
        (1) series expansion     if alpha>x or x<=1
        (2) continued fraction   otherwise
        RATNEST FORTRAN by
        Bhattacharjee GP (1970) The incomplete gamma integral.  Applied Statistics,
        19: 285-287 (AS32)

    """
    p = alpha
    g = _ln_gamma_function(alpha)
    accurate = 1e-8
    overflow = 1e30
    gin = 0
    rn = 0
    a = 0
    b = 0
    an = 0
    dif = 0
    term = 0
    if x == 0:
        return 0
    if x < 0 or p <= 0:
        return -1
    factor = exp(p * log(x) - x - g)
    if x > 1 and x >= p:
        a = 1 - p
        b = a + x + 1
        term = 0
        pn = [1, x, x + 1, x * b, None, None]
        gin = pn[2] / pn[3]
    else:
        gin = 1
        term = 1
        rn = p
        while term > accurate:
            rn += 1
            term *= x / rn
            gin += term
        gin *= factor / p
        return gin
    while True:
        a += 1
        b += 2
        term += 1
        an = a * term
        for i in range(2):
            pn[i + 4] = b * pn[i + 2] - an * pn[i]
        if pn[5] != 0:
            rn = pn[4] / pn[5]
            dif = abs(gin - rn)
            if dif > accurate:
                gin = rn
            elif dif <= accurate * rn:
                break
        for i in range(4):
            pn[i] = pn[i + 2]
        if abs(pn[4]) < overflow:
            continue
        for i in range(4):
            pn[i] /= overflow
    gin = 1 - factor * gin
    return gin