File: prob_vector.h

package info (click to toggle)
phast 1.4%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 12,412 kB
  • sloc: ansic: 54,180; makefile: 354; sh: 337; perl: 321
file content (146 lines) | stat: -rw-r--r-- 5,309 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
/***************************************************************************
 * PHAST: PHylogenetic Analysis with Space/Time models
 * Copyright (c) 2002-2005 University of California, 2006-2010 Cornell 
 * University.  All rights reserved.
 *
 * This source code is distributed under a BSD-style license.  See the
 * file LICENSE.txt for details.
 ***************************************************************************/

/** @file prob_vector.h
    Vectors representing discrete probability distributions over
    non-negative integers.  General idea is element x of vector v (x >=
    0) represents p(x).  With long-tailed distributions (e.g.,
    Poisson), vectors are truncated at size x_max such that p(y) <
    epsilon for y >= x_max, where epsilon is an input parameter. 
    @ingroup base
*/

#ifndef PROB_VECTOR
#define PROB_VECTOR

#include <vector.h>

/** Type of p-value calculated */
typedef enum {LOWER, /**< Lower tail p-value */
 UPPER, /**< Upper tail p-value */ 
 TWOTAIL /**< Two tail p-value */
} p_val_type;

/** Compute Mean and Variance of probability vector.
  @param p Probability vector
  @param mean Mean value of p
  @param var Variance of p
 */
void pv_stats(Vector *p, double *mean, double *var);

/** Compute min and max of specified confidence interval.  
  @param[in] p Probability vector
  @param[in] size Size of confidence interval (between 0 and 1)
  @param[out] interval_min Computed lower cap on confidence interval
  @param[out] interval_max Computed high cap on of confidence interval
 */
void pv_confidence_interval(Vector *p, double size, int *interval_min, 
                            int *interval_max);
/** Compute quantiles 0.00, 0.01, 0.02, ..., 1.00. based on probability vector 
    @param p Probability vector
    @result Array of length 101, such that element x contains the x/100th quantile. 
*/
int* pv_quantiles(Vector *p);


/** Return one-sided p-value: p(x <= x_0) if side == LOWER, p(x >= x_0)
   if side == UPPER.  If side == TWOTAIL, heuristically returns 2 *
   min(p(x <= x_0, p(x >= x_0)) 
   @param[in] distrib Probability vector
   @param[in] x_0 Index of distrib (between 0 and distrib->size)
   @param[in] side Type of p-value i.e. lower, upper, two tail
   @result p-value
   @note For drawbacks of this approach and discussion see Dunne et al.,
   The Statistician, 1996) 
*/
double pv_p_value(Vector *distrib, double x_0, p_val_type side);

/** Compute one-sided p-values for array of values.  
   @param[in] distrib Probability vector
   @param[in] x_0 Array of indices of distrib (between 0 and distrib->size)
   @param[in] n Number of elements in x_0
   @param[out] pvals Array of p-values corresponding to indices of x_0
   @Like pv_p_value, but saves time by computing CDF and using for all pvals
*/
void pv_p_values(Vector *distrib, double *x_0, int n, double *pvals,
                 p_val_type side);

/** Normalize distribution 
    @param p Distribution to normalize
*/
void pv_normalize(Vector *p);

/** \name Convolve vector functions 
\{ */
/** Convolve distribution 'n' times (slower)
  @param p Distribution to convolve
  @param n Number of times to convolve
  @param epsilon Trim values less than epsilon off tail
  @result Convolved vector
*/
Vector *pv_convolve(Vector *p, int n, double epsilon);

/** Convolve distribution 'n' times and keep all intermediate distributions
  @param p Distribution to convolve
  @param n Number of times to convolve
  @param epsilon Trim values less than epsilon off tail
  @result Array (q) of convolved vectors s.t. q[i] ( 1 <= i <= n) is the ith convolution of p (q[0] is null)
*/
Vector **pv_convolve_save(Vector *p, int n, double epsilon);

/** Take convolution of a set of probability vectors.  
  @param p Array of probability vectors
  @param counts (Optional) Array of multiplicities, one for each distribution in p; Defaults to 1 per dist.
  @param epsilon Trim values less than epsilon off tail 
  @result Convolved vector
 */
Vector *pv_convolve_many(Vector **p, int *counts, int n, double epsilon);

/** Convolve distribution 'n' times (faster)
  @param p Distribution to convolve
  @param n Number of times to convolve
  @param epsilon Trim values less than epsilon off tail
  @result Convolved vector 
*/
Vector *pv_convolve_fast(Vector *p, int n, double epsilon);

/** \} */

/** Compute and return a probability vector giving Pois(x | lambda) up to
   point where < epsilon 
  @param lambda Description of distribution
  @param epsilon Lowest allowed value in probability vector
  @result Probability vector
*/
Vector *pv_poisson(double lambda, double epsilon);

/** Compute CDF based on probability vector.  
  @param pdf Probability vector
  @param side Type of p-value; If side == UPPER, computes 
  cumulative probabilities for right tail rather than left
  @result CDF
*/
Vector *pv_cdf(Vector *pdf, p_val_type side);

/** Given a probability array, draw an index
   @pre Call srandom externally
   @param arr Probability to draw from
   @param n Number of elements in arr
   @result Draw from probability vector
 */
int pv_draw_idx_arr(double *arr, int n);

/** Given a probability vector, draw an index. 
   @pre Call srandom externally
   @param pdf Probability to draw from
   @result Draw from probability vector
*/
int pv_draw_idx(Vector *pdf);

#endif