File: indel_mod.h

package info (click to toggle)
phast 1.5%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 13,008 kB
  • sloc: ansic: 54,195; makefile: 358; sh: 337; perl: 321
file content (238 lines) | stat: -rw-r--r-- 9,997 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
/***************************************************************************
 * PHAST: PHylogenetic Analysis with Space/Time models
 * Copyright (c) 2002-2005 University of California, 2006-2010 Cornell 
 * University.  All rights reserved.
 *
 * This source code is distributed under a BSD-style license.  See the
 * file LICENSE.txt for details.
 ***************************************************************************/


/**
  @file indel_mod.h
  Simple model of insertions and deletions, assumes given indel history 
  @ingroup phylo
*/

#ifndef INDMOD_H
#define INDMOD_H

#include <indel_history.h>

/** Number of indel states i.e. insert, delete, base */
#define NINDEL_STATES 3
//need to make sure ERROR isn't already defined from R libraries
#undef ERROR
/** Possible indel results when comparing two sites. */
typedef enum {MATCH,  /**< Parent and child both 'base' at same site. */ 
	      CHILDINS, /**< Parent has 'insertion' while child has 'base' at same site. */
	      CHILDDEL, /**< Parent has 'base' while child has 'insertion' at same site. */
	      SKIP,   /**< Parent and child both have ('insertion' at same site) or ('deletion' at same site). */
	      ERROR  /**< There was an error in comparing sites between parent and child. */
	     } col_type;

/** Indel model for a single branch */
typedef struct {
  double alpha; /**< Rate of insertion per expected substitution per site for this branch */
  double beta; /**< Rate of deletion per expected substitution per site for this branch */
  double tau; /**< Roughly equal to the inverse of the expected indel length (modulo
    adjustments required to make probabilities sum to one) for this branch */
  double t; /** ??? */
  MarkovMatrix *probs; /**< Indel substitution pattern for this branch*/
  Matrix *log_probs;  /**< Log of substitution pattern probabilities for this branch*/
  Vector *beg_probs;  /**< Beginning probabilities for this branch */
  Vector *beg_log_probs; /**< Log of beg_probs for this branch */
} BranchIndelModel;

/** Indel model for a dataset */
typedef struct {
  double alpha; /**< Overall rate of insertion per expected substitution per site */
  double beta; /**< Overall rate of deletion per expected substitution per site */
  double tau;  /**< Roughly equal to the inverse of the expected indel length (modulo 
    adjustments required to make probabilities sum to one) for this branch */
  double training_lnl; /**< Log likelihood from performing training (if any) */
  TreeNode *tree; /**< Tree representing structure of dataset */
  BranchIndelModel **branch_mods; /**< Indel models for each branch */
} IndelModel;

/** Model sufficient statistics for per branch */
typedef struct {
  Matrix *trans_counts; /**< Transition counts */
  Vector *beg_counts;   /**< Beginning counts */
} BranchIndelSuffStats;

/** Model sufficient statistics per dataset */
typedef struct {
  TreeNode *tree; /**< Tree structure */
  BranchIndelSuffStats **branch_counts; /**< List of model statistics per branch */
} IndelSuffStats;

/** \name Indel (Branch) Model allocation functions 
 \{ */

/** Create new Indel model for a branch 
    @param alpha Rate of insertion per expected substitution per site
    @param beta Rate of deletion per expected substitution per site 
    @param tau Roughly equal to the inverse of the expected indel length 
               (modulo adjustments required to make probabilities sum to one) for this branch 
    @param t t
    @result New branch indel model
*/
BranchIndelModel *im_new_branch(double alpha, double beta, double tau,
                                double t);

/** Create new Indel model containing all branches in a tree, each initialized to the exact same alpha, beta, tau.
    @param alpha Rate of insertion per expected substitution per site
    @param beta Rate of deletion per expected substitution per site
    @param tau Roughly equal to the inverse of the expected indel length 
               (modulo adjustments required to make probabilities sum to one) for this branch 
    @param tree Tree structure
    @result New branch indel model
*/
IndelModel *im_new_all(double alpha, double beta, double tau,  TreeNode *tree);

/** Create new Indel model containing all branches in a tree, each with its own alpha, beta, tau.
    @param alpha Rates of insertion per expected substitution per site
    @param beta Rates of deletion per expected substitution per site 
    @param tau Roughly equal to the inverse of the expected indel length 
               (modulo adjustments required to make probabilities sum to one) for this branch 
    @param tree Tree structure
    @result New branch indel model
*/
IndelModel *im_new(double *alpha, double *beta, double *tau, 
                   TreeNode *tree);

/** \name Indel (Branch) Model cleanup functions 
 \{ */

/**  Free a branch indel model. 
     @param bim Branch Model to free
*/
void im_free_branch(BranchIndelModel *bim);

/** Free a indel model.
    @param im Indel Model to free
*/
void im_free(IndelModel *im);

/** Free a statistics object. 
    @param iss Statistics object to free
*/
void im_free_suff_stats(IndelSuffStats *iss);


/** \name Indel (Branch) Model modification functions 
 \{ */

/**  Set values of alpha, beta, tau, t for a branch indel model
     @param bim Branch Indel Model to modify
     @param alpha rate of insertion per expected substitution per site
     @param beta rate of deletion per expected substitution per site 
     @param tau Roughly equal to the inverse of the expected indel length 
               (modulo adjustments required to make probabilities sum to one) for this branch 
*/
void im_set_branch(BranchIndelModel *bim, double alpha, double beta, 
                   double tau, double t);

/**  Set values of alpha, beta, tau, t for EVERY branch in indel model
    @param im Indel Model to modify
    @param alpha Rate of insertion per expected substitution per site
    @param beta Rate of deletion per expected substitution per site 
    @param tau Roughly equal to the inverse of the expected indel length 
               (modulo adjustments required to make probabilities sum to one) for this branch 
    @param tree Tree structure
*/
void im_set_all(IndelModel *im, double alpha, double beta, double tau, 
                TreeNode *tree);

/**  Set per branch values of alpha, beta, tau, tree for EVERY branch in indel model
    @param im Indel Model to modify
    @param alpha Rates of insertion per expected substitution per site
    @param beta Rates of deletion per expected substitution per site 
    @param tau Roughly equal to the inverse of the expected indel length 
               (modulo adjustments required to make probabilities sum to one) for this branch 
    @param tree Tree structure
*/
void im_set(IndelModel *im, double *alpha, double *beta, double *tau, 
            TreeNode *tree);

/** \name Indel (Branch) Model Log Likelihood functions 
 \{ */

/** Calculate log likelihood for a branch.
    @param[in] ih Indel History
    @param[in] bim Branch to compute log likelihood for
    @param[in] child ID of child within branch
    @param[out] col_logl Computed log likelihoods per column in branch
    @result Log likelihood for entire branch
 */
double im_branch_column_logl(IndelHistory *ih, BranchIndelModel *bim, 
                             int child_id, double *col_logl);

/** Calculate total log likelihood.
    @param[in] ih Indel History
    @param[in] im Indel Model to get log likelihood for
    @param[out] col_logl Computed log likelihoods for each branch
    @result Column log likelihood
*/
double im_column_logl(IndelHistory *ih, IndelModel *im, double *col_logl);

/** Calculate total log likelihood.
    @param[in] im Indel Model to get log likelihood for
    @param[in] iss Indel Sufficient Statistics
    @result Tree log likelihood
*/
double im_likelihood(IndelModel *im, IndelSuffStats *iss);

/** \name Indel (Branch) Model sufficient statistics functions
 \{ */

/** Create Branch Indel Sufficient Statistics.
    @param ih Indel History to calculate statistics on
    @param child_id ID of first node of branch to get statistics for in tree
    @result Branch indel sufficient statistics
*/
BranchIndelSuffStats *im_suff_stats_branch(IndelHistory *ih, int child_id);

/** Create Indel Sufficient Statistics.
    @param ih Indel History to create statistics for
    @result Indel sufficient statistics
*/
IndelSuffStats *im_suff_stats(IndelHistory *ih);


/* Not implemented 
double im_simulate_history(IndelModel *tim, int ncols);
*/


/** Collect sufficient statistics for a branch, considering only sites in the specified category.
    @param ih Indel History to calculate Sufficient Statistics for
    @param child_id First node of branch to obtain sufficient statistics for
    @param categories Integers specifying which category each column is in
    @param do_cat Integer specifying which category is accepted 
    @result Collected sufficient statistics for the branch specified
*/
BranchIndelSuffStats *im_suff_stats_branch_cat(IndelHistory *ih, int child_id,
                                               int *categories, int do_cat);

/** Collect sufficient statistics for an entire tree, considering only sites in the specified category.
    @param ih Indel History to calculate Sufficient Statistics for
    @param categories Integers specifying which category each column is in
    @param do_cat Integer specifying which category is accepted 
    @result Collected sufficient statistics for the tree specified
*/
IndelSuffStats *im_suff_stats_cat(IndelHistory *ih, int *categories, 
                                  int do_cat);
/** \} */

/** Estimate alpha, beta, and tau from an indel history by maximum likelihood.
    @param im Indel Model 
    @param ih Indel History
    @param ss Indel Sufficient Statistics
    @param logf Log file to write to
*/
void im_estimate(IndelModel *im, IndelHistory *ih, IndelSuffStats *ss, 
                 FILE *logf);

#endif