File: chi2.py

package info (click to toggle)
python-biopython 1.78%2Bdfsg-4
links: PTS, VCS
area: main
in suites: bullseye
size: 65,756 kB
sloc: python: 221,141; xml: 178,777; ansic: 13,369; sql: 1,208; makefile: 131; sh: 70
file content (136 lines) | stat: -rw-r--r-- 3,727 bytes
parent folder | download | duplicates (2)
# Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com)
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
#
# This code is adapted (with permission) from the C source code of chi2.c,
# written by Ziheng Yang and included in the PAML software package:
# http://abacus.gene.ucl.ac.uk/software/paml.html

"""Methods to calculate p-values from a Chi-squared cumulative distribution function.

for likelihood ratio tests.
"""

from math import log, exp


def cdf_chi2(df, stat):
    """Compute p-value, from distribution function and test statistics."""
    if df < 1:
        raise ValueError("df must be at least 1")
    if stat < 0:
        raise ValueError("The test statistic must be positive")
    x = 0.5 * stat
    alpha = df / 2.0
    prob = 1 - _incomplete_gamma(x, alpha)
    return prob


def _ln_gamma_function(alpha):
    """Compute the log of the gamma function for a given alpha (PRIVATE).

    Comments from Z. Yang:
    Returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places.
    Stirling's formula is used for the central polynomial part of the procedure.
    Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.
    Communications of the Association for Computing Machinery, 9:684
    """
    if alpha <= 0:
        raise ValueError
    x = alpha
    f = 0
    if x < 7:
        f = 1
        z = x
        while z < 7:
            f *= z
            z += 1
        x = z
        f = -log(f)
    z = 1 / (x * x)
    return (
        f
        + (x - 0.5) * log(x)
        - x
        + 0.918938533204673
        + (
            ((-0.000595238095238 * z + 0.000793650793651) * z - 0.002777777777778) * z
            + 0.083333333333333
        )
        / x
    )


def _incomplete_gamma(x, alpha):
    """Compute an incomplete gamma ratio (PRIVATE).

    Comments from Z. Yang::

        Returns the incomplete gamma ratio I(x,alpha) where x is the upper
               limit of the integration and alpha is the shape parameter.
        returns (-1) if in error
        ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant.
        (1) series expansion     if alpha>x or x<=1
        (2) continued fraction   otherwise
        RATNEST FORTRAN by
        Bhattacharjee GP (1970) The incomplete gamma integral.  Applied Statistics,
        19: 285-287 (AS32)

    """
    p = alpha
    g = _ln_gamma_function(alpha)
    accurate = 1e-8
    overflow = 1e30
    gin = 0
    rn = 0
    a = 0
    b = 0
    an = 0
    dif = 0
    term = 0
    if x == 0:
        return 0
    if x < 0 or p <= 0:
        return -1
    factor = exp(p * log(x) - x - g)
    if x > 1 and x >= p:
        a = 1 - p
        b = a + x + 1
        term = 0
        pn = [1, x, x + 1, x * b, None, None]
        gin = pn[2] / pn[3]
    else:
        gin = 1
        term = 1
        rn = p
        while term > accurate:
            rn += 1
            term *= x / rn
            gin += term
        gin *= factor / p
        return gin
    while True:
        a += 1
        b += 2
        term += 1
        an = a * term
        for i in range(2):
            pn[i + 4] = b * pn[i + 2] - an * pn[i]
        if pn[5] != 0:
            rn = pn[4] / pn[5]
            dif = abs(gin - rn)
            if dif > accurate:
                gin = rn
            elif dif <= accurate * rn:
                break
        for i in range(4):
            pn[i] = pn[i + 2]
        if abs(pn[4]) < overflow:
            continue
        for i in range(4):
            pn[i] /= overflow
    gin = 1 - factor * gin
    return gin