1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
|
/***************************************************************************
* PHAST: PHylogenetic Analysis with Space/Time models
* Copyright (c) 2002-2005 University of California, 2006-2010 Cornell
* University. All rights reserved.
*
* This source code is distributed under a BSD-style license. See the
* file LICENSE.txt for details.
***************************************************************************/
/** @file prob_matrix.h
Libraries and functions to operate on matrices representing discrete
bivariate probability distributions defined for non-negative integers.
General idea is element (x,y)
of matrix M (x,y >= 0) represents p(x,y). With long-tailed
distributions, matrix dimensions are truncated at size x_max, y_max
such that p(x,y) < epsilon for x >= x_max, y >= x_max, where
epsilon is an input parameter.
A stochastic matrix describes a Markov chain over a finite state space.
If the probability of moving from x to y in one time step is Pr(y | x) = Px,y, the stochastic matrix P is given by using Pi,j as the ith row and jth column element
@ingroup base
*/
#ifndef PROB_MATRIX
#define PROB_MATRIX
#include <vector.h>
#include <matrix.h>
/** Probability matrix epsilon value */
#define PM_EPS 1e-10
/** Calculate mean x & mean y for probability matrix
@param[in] p Probability matrix
@param[out] mean_x Mean of X probabilities
@param[out] mean_y Mean of Y probabilities
*/
void pm_mean(Matrix *p, double *mean_x, double *mean_y);
/** \name Marginal distribution functions
\{ */
/** Return marginal distribution for x, sum_y p(x, y) as vector
@param p Probability Matrix
@result Sum of p[x][y] grouped by x with each element in the vector being a different x
*/
Vector *pm_marg_x(Matrix *p);
/** Return marginal distribution for y, sum_x p(x, y) as vector
@param p Probability Matrix
@result Sum of p[x][y] grouped by y with each element in the vector being a different y
*/
Vector *pm_marg_y(Matrix *p);
/** Return marginal distribution for x + y, p(n) = sum_{x,y:x+y=n} p(x,y) as vector
@param p Probability Matrix
@result Sum of p[x][y] grouped by (x+y)
*/
Vector *pm_marg_tot(Matrix *p);
/** \} */
/** Normalize matrix so that all elements sum to one.
@param[in,out] p Probability Matrix
*/
void pm_normalize(Matrix *p);
/** Compute means, variances, and covariance of probability matrix.
@param[in] p Matrix containing data to analyze
@param[out] mean_x Mean of X probabilities
@param[out] mean_y Mean of Y probabilities
@param[out] var_x Variance of X probabilities
@param[out] var_y Variance of Y probabilities
@param[out] covar Covariance of probability matrix
*/
void pm_stats(Matrix *p, double *mean_x, double *mean_y, double *var_x,
double *var_y, double *covar);
/** \name Conditional Probability Distribution functions
\{ */
/** Return conditional probability distribution of x given x+y
@param p Probability Matrix
@param tot Given total, x+y
*/
Vector *pm_x_given_tot(Matrix *p, int tot);
/** Return conditional probability distribution of y given x+y
@param p Probability Matrix
@param tot Given total, x+y
*/
Vector *pm_y_given_tot(Matrix *p, int tot);
/** Return conditional probability distribution of x given x+y assuming bivariate normal
@param p Probability Matrix
@param tot Given total, x+y
@param mu_x Mean X
@param mu_y Mean Y
@param sigma_x Standard deviation X
@param sigma_y Standard deviation Y
@param rho Correlation coefficient
@note Only the region of the bivariate normal with x,y >= 0 is considered.
@note For use in central limit theorem approximations. */
Vector *pm_x_given_tot_bvn(int tot, double mu_x, double mu_y,
double sigma_x, double sigma_y, double rho);
/** Return conditional probability distribution of y given x+y assuming bivariate normal
@param p Probability Matrix
@param tot Given total, x+y
@param mu_x Mean X
@param mu_y Mean Y
@param sigma_x Standard deviation X
@param sigma_y Standard deviation Y
@param rho Correlation coefficient
@note Only the region of the bivariate normal with x,y >= 0 is considered.
@note For use in central limit theorem approximations. */
Vector *pm_y_given_tot_bvn(int tot, double mu_x, double mu_y,
double sigma_x, double sigma_y, double rho);
/** Return conditional probability distribution of x given x+y assuming independence of x and y
@param p Probability Matrix
@param tot Given total, x+y
@param mu_x Mean X
@param mu_y Mean Y
@param sigma_x Standard deviation X
@param sigma_y Standard deviation Y
@param rho Correlation coefficient
@note Computes desired conditional distribution from their marginals
*/
Vector *pm_x_given_tot_indep(int tot, Vector *marg_x, Vector *marg_y);
/** Return conditional probability distribution of y given x+y assuming independence of x and y
@param p Probability Matrix
@param tot Given total, x+y
@param mu_x Mean X
@param mu_y Mean Y
@param sigma_x Standard deviation X
@param sigma_y Standard deviation Y
@param rho Correlation coefficient
@note Computes desired conditional distribution from their marginals
*/
Vector *pm_y_given_tot_indep(int tot, Vector *marg_x, Vector *marg_y);
/** \} */
/** \name Convolution functions
\{ */
/** Convolve distribution 'n' times. (Slower)
@param p Probability Matrix
@param n Amount of times to convolve distribution
@param epsilon Trim values less than epsilon off tail
@result Convolved matrix
*/
Matrix *pm_convolve(Matrix *p, int n, double epsilon);
/** Convolve distribution n times and keep all intermediate
distributions.
@param p Probability Matrix
@param n Amount of times to convolve distribution
@param epsilon Trim values less than epsilon off tail
@result Array q such that q[i] (1 <= i <= n) is the ith convolution of p (q[0] will be NULL)
*/
Matrix **pm_convolve_save(Matrix *p, int n, double epsilon);
/** Take convolution of a set of probability matrices (slower)
@param p Array of Probability Matrices
@param counts (Optional) Array of multiplicities, one of each distribution in p; Defaults to 1 per dist.
@param n Amount of times to convolve distribution
@param epsilon Trim values less than epsilon off tail
@result Convolved matrix
*/
Matrix *pm_convolve_many(Matrix **p, int *counts, int n, double epsilon);
/** Take convolution of a set of probability matrices (Faster)
@param p Array of probability Matrices
@param n Number of times to convolve distribution
@param max_nrows Maximum number of rows of result matrix
@param max_ncols maximum number of columns of result matrix
@result Convolved matrix
@note Dos not take counts, normalize, or trim dimension
*/
Matrix *pm_convolve_many_fast(Matrix **p, int n, int max_nrows, int max_ncols);
/** Convolve distribution 'n' times. (Faster)
@param p Probability Matrix
@param n Amount of times to convolve distribution
@param epsilon Trim values less than epsilon off tail
@result Convolved matrix
*/
Matrix *pm_convolve_fast(Matrix *p, int n, double epsilon);
/** \} */
#endif
|