File: chisqrgen_dist.c

package info (click to toggle)
theseus 3.3.0-14
links: PTS, VCS
area: main
in suites: bookworm, forky, sid, trixie
size: 91,424 kB
sloc: ansic: 41,682; makefile: 267; sh: 121
file content (225 lines) | stat: -rw-r--r-- 5,670 bytes
parent folder | download | duplicates (5)
/*
    Theseus - maximum likelihood superpositioning of macromolecular structures

    Copyright (C) 2004-2015 Douglas L. Theobald

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the:

    Free Software Foundation, Inc.,
    59 Temple Place, Suite 330,
    Boston, MA  02111-1307  USA

    -/_|:|_|_\-
*/

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <gsl/gsl_sf_gamma.h>
#include <gsl/gsl_sf_psi.h>
//#include "DLTmath.h"
#include "statistics.h"
#include "gamma_dist.h" /* gamma_dev() */
#include "chisqrgen_dist.h"

/* The chi^2 distribution has the form

   p(x) dx = (1/(2^(n/2) * Gamma(nu/2))) * x^((nu - 2)/2) * exp(-x/2)) dx

   0 <= x < +inf
   nu > 0
*/

double
chisqrgen_dev(const double nu, const double lambda, const gsl_rng *r2)
{
    return(lambda * 2.0 * gamma_dev(1.0, 0.5*nu, r2));
}


double
chisqrgen_pdf(const double x, const double nu, const double lambda)
{
    double          p, nu2;

    if (x <= 0.0)
    {
        return(0.0);
    }
    else
    {
        nu2 = 0.5*nu;

        p = (nu2-1.0)*log(x/lambda) - 0.5*x/lambda - nu2*log(2.0) - lgamma(nu2);
        return(exp(p)/lambda);
    }
}


double
chisqrgen_lnpdf(const double x, const double nu, const double lambda)
{
    double          nu2 = 0.5*nu;

    return((nu2-1.0)*log(x/lambda) - 0.5*x/lambda - nu2*log(2.0) - lgamma(nu2) - log(lambda));
}


double
chisqrgen_cdf(const double x, const double nu, const double lambda)
{
    if (x <= 0.0)
        return(0.0);
    else
    {
/*         printf("\n****** %f %f %f", */
/*                gsl_sf_gamma_inc_P(0.5*nu, 0.5*x/lambda), */
/*                IncompleteGamma(0.5*nu, 0.5*x/lambda), */
/*                1.0 - IncompleteGamma(0.5*nu, 0.5*x/lambda)/tgamma(0.5*x/lambda)); */
        return(gsl_sf_gamma_inc_P(0.5*nu, 0.5*x/lambda));
    }
}


double
chisqrgen_sdf(const double x, const double nu, const double lambda)
{
    if (x <= 0.0)
        return(1.0);
    else
        return(1.0 - chisqrgen_cdf(x, nu, lambda));
}


double
chisqrgen_int(const double x, const double y, const double nu, const double lambda)
{
    if (x <= 0.0)
        return(chisqrgen_cdf(y, nu, lambda));
    else
        return(chisqrgen_cdf(y, nu, lambda) - chisqrgen_cdf(x, nu, lambda));
}


/* This is from Cover and Thomas (_Elements of Information Theory_),
   but it is wrong. Something's wrong.
   I even did the integration myself and got the same answer. Weird. 
*/
/* double */
/* chisqrgen_logL(const double nu, const double lambda) */
/* { */
/*     double         logL, nu2; */
/*      */
/*     nu2 = nu / 2.0; */
/*  */
/*     logL = -log(2.0 * tgamma(nu2)) - (1.0 - nu2) * gsl_sf_psi(nu2) - nu2; */
/*  */
/*     return(logL); */
/* } */

double
chisqrgen_logL(const double nu, const double lambda)
{
    double          nu2 = 0.5*nu;

/*     printf("\nchisqrgen logL: %f %f %f %f %f\n", */
/*            (nu2 - 1.0)*gsl_sf_psi(nu2), - log(2.0), - nu2, - lgammav, */
/*            (nu2 - 1.0)*gsl_sf_psi(nu2) - log(2.0) - nu2 - lgammav); */

    return((nu2 - 1.0)*gsl_sf_psi(nu2) - log(2.0) - nu2 - lgamma(nu2));
}


/* For the maximum likelihood fit we nust find the root of:

       F1 = (1/N)\Sum{log(x)} - log(2 lambda) - digamma{nu/2} = 0

   where the first derivative with repect to nu (dF1/dnu) is:

       F1' = -trigamma(nu/2)/2 = 0
*/
static void
evalchisqrgenML(const double logterm, const double nu, const double lambda, double *fx, double *dfx)
{
    *fx  = logterm - gsl_sf_psi(0.5*nu) - log(2.0 * lambda);
    *dfx = -0.5*gsl_sf_psi_1(0.5*nu);
}


/* fit a chisqrgen distribution by maxinum likelihood */
double
chisqrgen_fit(const double *data, const int num, double *nu, double *lambda, double *prob)
{
    double          ave, var, logterm, fx, dfx, guess_nu;
    int             i;
    double          iter = 100;
    double          tol = 1e-8;

    ave = 0.0;
    for (i = 0; i < num; ++i)
    {
        if (data[i] < 0.0)
        {
            fprintf(stderr, "\n ERROR345: chi^2 distributed data must be >= 0.0 ");
            return(-1.0);
        }
        else
        {
            ave += data[i];
        }
    }
    ave /= (double) num;

    var = 0.0;
    for (i = 0; i < num; ++i)
        var += (data[i] - ave) * (data[i] - ave);
    var /= (double) num;

    guess_nu = *nu = 2.0 * ave * ave / var;
    *lambda = 0.5 * var / ave;

    logterm = 0.0;
    for (i = 0; i < num; ++i)
    {
        if(data[i] == 0.0)
            continue;

        logterm += log(data[i]);
    }
    logterm /= (double) num;

    for (i = 0; i < iter; ++i)
    {
        evalchisqrgenML(logterm, *nu, *lambda, &fx, &dfx);

        if (fabs(fx) < tol)
            break; /* success */

        *nu -= (fx / dfx); /* Newton-Raphson correction */

        if (*nu <= 0.0)
            *nu = tol;

        *lambda = ave / *nu;
/*         printf("\n chi^2 gen -- nu: %f  lambda: %f ", *nu, *lambda); */
/*         fflush(NULL); */
    }

    if (i == iter)
        *nu = guess_nu;

    /* chisqrgen_logL(*nu, 0.0); */

    return(chi_sqr_adapt(data, num, 0, prob, *nu, *lambda, chisqrgen_pdf, chisqrgen_lnpdf, chisqrgen_int));
}