File: contTimeMC.hh

package info (click to toggle)
augustus 3.5.0%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 777,052 kB
  • sloc: cpp: 80,066; perl: 21,491; python: 4,368; ansic: 1,244; makefile: 1,141; sh: 171; javascript: 32
file content (194 lines) | stat: -rw-r--r-- 6,562 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
/*
 * contTimeMC.hh
 * @author Mario Stanke
 * @author Stefanie König
 *
 * License: Artistic License, see file LICENSE.TXT or 
 *          https://opensource.org/licenses/artistic-license-1.0
 */

#ifndef _CONTTIMEMC_HH
#define _CONTTIMEMC_HH

#include "matrix.hh"

// standard C/C++ includes
#include <cmath>
#include <iostream>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_eigen.h>
#include <gsl/gsl_linalg.h>


using namespace std;

/**
 * @brief abstract base class for codon and exon evolution
 * 
 * @author Mario Stanke
 * @author Stefanie König
 */
class Evo {
public:
  Evo(int s) : states(s), m(0), pi(NULL) {};
    virtual ~Evo();
    int getNumStates(){return states;}

    /* Determine the branch lengths for which matrices P should be stored, at most m.
     * For trees with few species, say <10, this could be all branch lengths b occurring
     * in the tree. A value of m=-1 means to store all lengths in b. Memory may be saved
     * by building m clusters roughly representing all lengths in b.
     */
    void setBranchLengths(vector<double> b, int m=-1);
    void printBranchLengths();

    virtual void getRateMatrices() {}; // precomputes or reads the rate matrices
    virtual void computeLogPmatrices()=0; // precomputes and stores the array of matrices
    virtual void addBranchLength(double b)=0; //add new branch length b and matrix P(b)

    double getPi(int i) const {return pi[i];}  //equilibrium frequency of state i
    double getLogPi(int i) const {return log(getPi(i));}

    gsl_matrix *getSubMatrixQ(int u){ return allQs[u]; }
    
    /*
     * Returns a pointer to the probability matrix for which t comes closest.
     * u is the index to a second parameter for which the probability matrices
     * are stored for different values.
     * in the case of codon evolution u is the index to omega (dN/dS, selection)
     * in the case of exon evolution probability matrices currently only
     * depend on t and thus u is set to 0. It is however conceivable
     * to allow different rates of exon loss in future.
     */
    gsl_matrix *getSubMatrixP(int u, double t);
    gsl_matrix *getSubMatrixLogP(int u, double t); 

    /*
     * compute the matrix exponential P(t) = exp(Q*t),
     * where Q is given by U, diag(lambda) and U^{-1} as computed by eigendecompose
     */
    gsl_matrix *expQt(double t, gsl_vector *lambda, gsl_matrix *U, gsl_matrix *Uinv);

protected:
    int findClosestIndex(vector<double> &v, double val);

protected:
    const int states; //number of states (64 in codon model and (currently) 2 in exon model)
    int m; // number of branch lengths (times) for which P's are stored
    double *pi;
    vector<double> times; // sorted vector of branch lengths
    vector<gsl_matrix *> allQs; // rate matrices
    Matrix<gsl_matrix *> allPs; // Parameterized probability matrices
    Matrix<gsl_matrix *> allLogPs; // Parameterized log probability matrices

};

/*
 * perform a decompososition of the rate matrix as Q = U * diag(lambda) * U^{-1}
 * returns 0 if sucessful
 */
int eigendecompose(gsl_matrix *Q,       // input
		   double *pi,          // input, must be same as in construction of Q
		   gsl_vector *&lambda, // output, memory for lambda, U and Uinv is allocated within the function
		   gsl_matrix *&U,      // output
		   gsl_matrix *&Uinv);  // output

// eigen decomposition was successful, if Q - U * diag(lambda) * Uinv is close to the 0-matrix
void validateEigenDecomp(gsl_matrix *Q, gsl_vector *lambda, gsl_matrix *U, gsl_matrix *Uinv, int states);

void printCodonMatrix(gsl_matrix *M);

/*
 * computes the element-wise natrual logarithm of a matrix P
 */
gsl_matrix *log(gsl_matrix *P, int states);

/*
 * @brief class ExonEvo
 * @details Computes and stores 2x2 probability matrices for exon gain/loss events
 * P(t) = exp(Q(lambda, mu)*t) for a sample of values for t
 * and a fixed given value for lambda (rate of exon gain) and mu (rate of exon loss)
 * Matrices are precomputed and retrieval of P(t) is then a constant time lookup
 * 
 * @author Mario Stanke
 * @author Stefanie König
 */
class ExonEvo : public Evo{

public:
    ExonEvo(int num_states=2) : Evo(num_states), U(NULL), Uinv(NULL), l(NULL)  {
	setLambda();
	setMu();
	setAliErr();
	// mu, lambda and ali_error have to be set before calculating equilibrium frequencies Pi
	setPi();
    };
    ExonEvo(int num_states, double lambda, double mu, double ali_error)
        : Evo(num_states), U(NULL), Uinv(NULL), l(NULL)  {
	setLambda(lambda, false);
	setMu(mu, false);
	setAliErr(ali_error, false);
	// mu, lambda and ali_error have to be set before calculating equilibrium frequencies Pi
	setPi();
    };
    ~ExonEvo(){
        if (U)
            gsl_matrix_free(U);
        if (Uinv)
            gsl_matrix_free(Uinv);
        if (l)
            gsl_vector_free(l);
    }
    void setPi();
    void setLambda(double lambda = 0.0001, bool propsOverride = true);
    void setMu(double mu = 0.0001, bool propsOverride = true);
    void setAliErr(double ali_error = 0.1, bool propsOverride = true);
    double getLambda() const{return lambda;}
    double getMu() const{return mu;}
    double getAliErr() const{return ali_error;}

    void getRateMatrices(); // precomputes or reads the rate matrices
    void computeLogPmatrices();
    void addBranchLength(double b); //add new branch length b and matrix P(b)
    double minBranchLength() const {return times.front();}
    int eigendecompose(gsl_matrix *Q);
    gsl_matrix *expQt(double t) {return Evo::expQt(t,l,U,Uinv);}
    gsl_matrix *getExonRateMatrix();
    void validateEigenDecomp(gsl_matrix *Q){::validateEigenDecomp(Q,l,U,Uinv,states);}
   
private:
    double mu;            // rate for exon loss
    double lambda;        // rate for exon gain
    double ali_error;     // rate for alignment error

    // eigen decomposition of exon rate matrix Q = U * diag(l_1,...,l_n) * Uinv
    gsl_matrix *U;
    gsl_matrix *Uinv;
    gsl_vector *l;
};

/*
 * @brief class Parsimony
 * @details transitions from state i to state j are independent of the branch length
 * and cost -1 if i!=j and 0 if i==j.
 * can be used to calculate the minimum number of codon substitutions
 * for a given tree topology
 * 
 * @author Mario Stanke
 * @author Stefanie König
 */
class Parsimony : public Evo{

public:
    Parsimony() : Evo(64) {
	this->m = 1;
	this->times.push_back(1);
	this->pi = new double[64];
	for(int i=0; i<64; i++){
	    pi[i]=1;
	}
    };
    void computeLogPmatrices();
    void addBranchLength(double b){} 
};
#endif    // _CONTTIMEMC_HH