1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
|
// $Id: fromQtoPt.cpp 5788 2009-01-19 22:24:16Z rubi $
#include "definitions.h"
#include "fromQtoPt.h"
#include "errorMsg.h"
#include "numRec.h"
#include "matrixUtils.h"
#include <iostream>
using namespace std;
#include <cassert>
//#define VERBOS
void q2pt::fillFromRateMatrix(const vector<MDOUBLE>& freq,
const VVdouble & qMatrix) {
// we first decompose Q to (F^0.5) M (F^-0.5)
// F is a diagonal matrix of the frequencies
// M is the symetrical matrix representation of Q.
VVdouble q_sym;
const int matrix_size = qMatrix.size();
q_sym.resize(matrix_size);
int k=0;
for (k=0; k < q_sym.size(); ++k) q_sym[k].resize(matrix_size);
calc_symmetric_q(qMatrix,q_sym,freq);
// now we have to find the eigen-vector decomposition of the q_sym.
VVdouble v; // v is the eigen vectors of the symetrical matrix.
v.resize(matrix_size);
for (k=0; k < qMatrix.size(); ++k) v[k].resize(matrix_size);
Vdouble eigenValues(matrix_size);
// symmetric_1pam = [v] [eigenValues] [transpose(v)]
//MyJacobi(q_sym,v, eigenValues); // notice that inv([v]) = [v] transpose;
/////i changed
computeEigenSystem(q_sym,v,eigenValues);
////
//#ifdef VERBOS
// LOG(5,<<"The eigen-vector matrix of the decomposition of the symetric matrix\n");
// for (int k1=0; k1 < v.size(); ++k1) {
// for (int k2=0; k2<v[k1].size(); ++k2) {
// LOG(5,<<v[k1][k2]<<" ");
// }
// LOG(5,<<endl);
// }
//#endif
VVdouble left_eig_of_pam; // v is the eigen vectors of the symetrical matrix.
left_eig_of_pam.resize(matrix_size);
for (k=0; k < left_eig_of_pam.size(); ++k) left_eig_of_pam[k].resize(matrix_size);
VVdouble right_eig_of_pam; // v is the eigen vectors of the symetrical matrix.
right_eig_of_pam.resize(matrix_size);
for (k=0; k < right_eig_of_pam.size(); ++k) right_eig_of_pam[k].resize(matrix_size);
calc_left_and_right_eig_of_pam(left_eig_of_pam,right_eig_of_pam,v,freq);
_leftEigen=left_eig_of_pam;
_rightEigen=right_eig_of_pam;
_eigenVector=eigenValues;
Vdouble _freq=freq;
// printing a pij(1);
//MDOUBLE t = 1;
//string fileName = "D://My Documents//adid//nimrod//inputs//inputs//aligned tce//aligned tce//P.F//P.F. vs P.F//eigenValues1.txt";
// ofstream out(fileName.c_str());
// for (int i=0;i<eigenValues.size();i++)
// out<<eigenValues[i] <<" ";
// out<<endl;
//for (int aa1=0; aa1 < eigenValues.size(); ++aa1) {
// for (int aa2=0; aa2 < eigenValues.size(); ++aa2) {
/// MDOUBLE sum=0;
// for (int k=0 ; k<eigenValues.size() ; ++k) {
// sum+=( left_eig_of_pam[aa1][k]*right_eig_of_pam[k][aa2]*exp(eigenValues[k]*t) );
// }
// LOG(5,<<sum<<" ");
// }
// LOG(5,<<endl);
// }
}
void q2pt::fillFrom1PAMMatrix(const vector<MDOUBLE>& freq,const VVdouble & onePam)
{
fillFromRateMatrix(freq,onePam);
for (int i=0; i < _eigenVector.size(); ++i) {
assert(_eigenVector[i]>0);
_eigenVector[i] = log(_eigenVector[i])* 100;
}
}
bool q2pt::currectFloatingPointProblems(MDOUBLE& sum) const {
if ((sum * (sum+err_allow_for_pijt_function))<0) sum=0;
if (((sum-1) * (sum-1.0-err_allow_for_pijt_function))<0) sum=1;
if (!((sum<=1) && (sum>=0)))
return false;
return true;
}
// Pij(t) = Sigma[k]{ [V]ik * [V^-1]kj * e^(Lamda_k*t) }
const MDOUBLE q2pt::Pij_t(const int i, const int j, const MDOUBLE t) const {
if (t<0) errorMsg::reportError("negative length in routine Pij_t");
// if ((_freq[i] == 0.0) || (_freq[j] == 0.0)) return 0.0;
MDOUBLE sum=0;
for (int k=0 ; k<_eigenVector.size() ; ++k) {
sum+=( _leftEigen[i][k]*_rightEigen[k][j]*exp(_eigenVector[k]*t) );
}
if (currectFloatingPointProblems(sum)) return sum;
// LOG(1,<<"err Pij_t i="<<i<<" j= "<<j<<" dis= "<<t<<" res= "<<sum<<endl);//sum is not in [0,1]
errorMsg::reportError("q2pt::Pij_t error in function pijt... ");return 0;
}
const MDOUBLE q2pt::dPij_dt(const int i,const int j, const MDOUBLE t) const {
MDOUBLE sum=0;
for (int k=0 ; k<_eigenVector.size() ; ++k) {
sum+=( _leftEigen[i][k]*_rightEigen[k][j]*exp(_eigenVector[k]*t)*_eigenVector[k]);
}
return sum;
}
const MDOUBLE q2pt::d2Pij_dt2(const int i,const int j, const MDOUBLE t) const {
MDOUBLE sum=0;;
for (int k=0 ; k<_eigenVector.size() ; ++k) {
sum+=( _leftEigen[i][k]*_rightEigen[k][j]*exp(_eigenVector[k]*t)*_eigenVector[k]*_eigenVector[k]);
}
return sum;
}
void q2pt::calc_symmetric_q(const VVdouble &q_matrix,
VVdouble &symmetric_q,
const Vdouble & freq)
//----------------------------------------------------------------------------------
//input: symmetric_1pam matrix is the output, pam1 is the input
//output: non
//doing: procedures to find eigen values work on symetrical matrices.
// dayhoff 1 pam in a new basis is symetrical
// the transformation is
//
// (1) [symmetric_1pam] = [sqrt(pi)] * [pam1] * [1/sqrt(pi)]
//
// [] for matrix. [sqrt(pi)] is a diagonal matrix were a[i][i] is the root of freq[i]
//reference: JME (1997) 45:696-703 Estimation of reversible substitution matrices from
// multiple pairs of sequences. Lars Arvestad and William J. Bruno.
//----------------------------------------------------------------------------------
{
int i,j;
for (i=0; i<q_matrix.size(); ++i) {
for (j=0; j<q_matrix.size(); ++j) {
if (q_matrix[i][j] != 0.0) {
symmetric_q[i][j] = q_matrix[i][j]*sqrt(freq[i])/sqrt(freq[j]);
}
}
}
/*check OZ
LOG(5,<<"sim matrix"<<endl);
for (i=0;i<symmetric_q.size();++i) {
for (j=0; j<symmetric_q.size(); ++j) {
//LOG(5,<<symmetric_q[i][j]<<" ");
LOG(5,<< setprecision(3) << setw(5) << symmetric_q[i][j]<<'\t');
}
LOG(5,<<endl);
} */
}
void q2pt::calc_left_and_right_eig_of_pam(
VVdouble &left_eig_of_pam,
VVdouble &right_eig_of_pam,
const VVdouble &v,
const Vdouble& freq) {
//----------------------------------------------------------------------------------
//input: left_eig_of_pam, right_eig_of_pam they will be the eigenvectors of pam1;
// freq is the vector of amino acid frequencies of the model.
// v is the eigen vector matrix of the symetrical matrix
//output: non
//doing: now [SYM] = [SqrtFreq] * [pam1] * inv([SqrtFreq])
// so [pam1] = inv([SqrtFreq]) * [SYM] * [SqrtFreq]
// SYM = [V] * [D] * transp([V])
// hence [pam1] = {inv([SqrtFreq]) * [V]} * [D] * {transp([V]) * [SqrtFreq]}
// {inv([SqrtFreq]) * [V]} is left_eig_of_pam, and the above one ^ is right.
//----------------------------------------------------------------------------------
int i,j;
for (i=0;i<v.size();++i) {
for (j=0;j<v.size();++j)
{
if ((freq[i] != 0.0) &&(freq[j] != 0.0)) {
left_eig_of_pam[i][j] = (1/sqrt(freq[i]))* v[i][j];
right_eig_of_pam[i][j]= sqrt(freq[j]) * v[j][i];
}
}
}
// LOG(5,<<"left_eig_of_pam"<<endl);
// for (i=0;i<4;++i) {
// for (j=0; j<4; ++j) {
// LOG(5,<<left_eig_of_pam[i][j]<<" ");
// LOG(5,<<pam1[i][i]<<" ");
// }
// LOG(5,<<endl);
// }
//
// LOG(5,<<"right eig_of_pam"<<endl);
// for (i=0;i<4;++i) {
// for (j=0; j<4; ++j) {
// LOG(5,<<right_eig_of_pam[i][j]<<" ");
// LOG(5,<<pam1[i][i]<<" ");
// }
// LOG(5,<<endl);
// }
//
// LOG(5,<<"press anykey"<<endl);
// char lll;
// cin>>lll;
}
VVdouble get1PamFromCountMatrix(const vector<MDOUBLE>& freq,
const VVdouble & sub_matrix){
//----------------------------------------------------------------------------------
//input: pam1 : a pointer to the matrix where pam1 will be.
// sub_matrix: the substitution matrix
// freq vector: the amino acid's frequenceis.
//output: non
//doing: fill in 1 pam from sub matrix and freq vector
//calculation: sub_matrix[a][b] is the substitution matrix, between a and b
// (sub_matrix[a][b]=sub_matrix[b][a])
// we use f[a][b] insted of sub_matrix[a][b] to be the same as the book
//(reference) "introduction to computational molecular biology by setubal and meidanis pg 80;
// let f[a] be sigma f[a][b] on all b (we made f[a][a] = 0;)
// i.e. f[a] is the number of mutation from a observed
// let f be sigma f[a] on all a; (=the total mutations*2)
// now, the mutaibility of a is defined as
//
// (1) m[a] = f[a] / (100*f*freq[a])
//
// 100*f is a scaling factor for 1 pam.
// then pam1[a][b] will be pr(a->b/a changed) * pr(a changed)
//
// (2) pam1[a][b] = (f[a][b]/f[a])*m[a]
//
// (3) f[a][a] = 1-m[a] (easy to show)
//
// notice that sigma 1pam[a][b] over all b is 1 and that
// sigma freq[a]*1pam[a][a] over all a is 0.99
//----------------------------------------------------------------------------------
const int _alphabetSize=sub_matrix.size();
VVdouble pam1;
pam1.resize(_alphabetSize);
for (int z=0; z < _alphabetSize; ++z) {
pam1[z].resize(_alphabetSize,0);
}
int i,j;//indices
MDOUBLE total=0; // i.e.f in the above explanation
for (i=0;i<_alphabetSize;++i) {
for (j=0; j<_alphabetSize; ++j){
total+=sub_matrix[i][j];
}
}
MDOUBLE tmsum;
for (i=0;i<_alphabetSize;++i) {
tmsum = 0.0;
for (j=i+1; j<_alphabetSize; ++j){
if ((freq[i] == 0.0) || (freq[j] == 0.0)) {
pam1[i][j] = 0.0;pam1[j][i] = 0.0;
} else {
pam1[i][j] = sub_matrix[i][j]/(100.0*total*freq[i]);
pam1[j][i] = sub_matrix[i][j]/(100.0*total*freq[j]);
}
}
}
for (i=0;i<_alphabetSize;++i) {
tmsum = 0.0;
for (j=0;j<_alphabetSize;++j) {
if (j!=i) tmsum += pam1[i][j];
}
if (freq[i] != 0.0) {
pam1[i][i]=1.0-tmsum;
}
}
#ifdef VERBOS
LOG(5,<<" priting the 4*4 top-left corner of the 1pam matrix * 10^6 "<<endl);
for (int a=0; a < 4; ++a) {
for (int b=0; b < 4; ++b) {
LOG(5,<<pam1[a][b]*1000000.0<<" ");
}
LOG(5,<<endl);
}
#endif
return pam1;
}
|