1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
|
/***************************************************************************
* Copyright (C) 2009 by BUI Quang Minh *
* minh.bui@univie.ac.at *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
#include "maalignment.h"
void MaAlignment::readLogLL(char *fileName)
{
//First read the values from inFile to a DoubleVector
DoubleVector _logllVec;
int siteNum = -1;
string currentString;
cout << "\nReading file containing site's loglikelihood: " << fileName << "...." << endl;
ifstream inFile;
try{
inFile.exceptions (ios::failbit | ios::badbit);
inFile.open(fileName);
/**really start reading*/
//read number of sites
inFile >> currentString;
siteNum = convert_int(currentString.c_str());
//ignore "Site_Lh"
inFile >> currentString;
while (!inFile.eof())
{
//reading each line of the file
//remove the badbit
inFile.exceptions (ios::badbit);
if ( !(inFile >> currentString) ) break;
//set the failbit again
inFile.exceptions (ios::failbit | ios::badbit);
_logllVec.push_back(convert_double(currentString.c_str()));
}/**finish reading*/
inFile.clear();
inFile.exceptions (ios::failbit | ios::badbit);
inFile.close();
} catch(bad_alloc){
outError(ERR_NO_MEMORY);
} catch (const char *str){
outError(str);
} catch (string str){
outError(str);
} catch (ios::failure){
outError(ERR_READ_INPUT);
} catch (...){
outError(ERR_READ_ANY);
}
if (siteNum != _logllVec.size())
outError("Actual number of site's likelihoods is not consistent with the announced number in the first line.");
cout << "Finish reading, now assign the logLL to the pattern:" << endl;
logLL.resize(getNPattern(),0.0);
for (int i = 0; i < siteNum; i++)
{
int patIndex = getPatternID(i);
if ( logLL[patIndex] == 0 )
logLL[patIndex] = _logllVec[i];
else
if ( logLL[patIndex] != _logllVec[i] )
// outError("Conflicting between the likelihoods reported for pattern", (*this)[i]);
outError("Conflicting between the likelihoods reported for pattern");
}
// int npat = getNPattern();
// cout << "Number of patterns: " << npat << endl;
// for ( int j = 0; j < npat; j++ )
// cout << j << "\t" << at(j) << "\t" << logLL[j] << endl;
cout << "Finish assigning logLL to the patterns!" << endl;
}
IntVector MaAlignment::computeExpectedNorFre()
{
IntVector expectedNorFre;
if ( logLL.empty())
outError("Error: log likelihood of patterns are not given!");
int patNum = getNPattern();
int alignLen = getNSite();
//resize the expectedNorFre vector
expectedNorFre.resize(patNum,-1);
//Vector containing the likelihood of the pattern p_i
DoubleVector LL(patNum,-1.0);
double sumLL = 0; //sum of the likelihood of the patterns in the alignment
//Compute the likelihood from the logLL
for ( int i = 0; i < patNum; i++ )
{
LL[i] = exp(logLL[i]);
sumLL += LL[i];
}
//Vector containing l_i = p_i*ell/sum_i(p_i)
DoubleVector ell(patNum, -1.0);
//Compute l_i
for ( int i = 0; i < patNum; i++ )
{
ell[i] = (double)alignLen * LL[i] / sumLL;
}
//Vector containing r_i where r_0 = ell_0; r_{i+1} = ell_{i+1} + r_i - ordinaryRounding(r_i)
DoubleVector r(patNum, -1.0);
//Compute r_i and the expected normalized frequencies
r[0] = ell[0];
expectedNorFre[0] = (int)floor(ell[0]+0.5); //note that floor(_number+0.5) returns the ordinary rounding of _number
int sum = expectedNorFre[0];
for (int j = 1; j < patNum; j++ )
{
r[j] = ell[j] + r[j-1] - floor(r[j-1]+0.5);
expectedNorFre[j] = (int)floor(r[j]+0.5);
sum += expectedNorFre[j];
}
//cout << "Number of patterns: " << patNum << ", sum of expected sites: " << sum << endl;
return expectedNorFre;
}
void MaAlignment::printPatObsExpFre(const char *fileName)
{
IntVector expectedNorFre = computeExpectedNorFre();
printPatObsExpFre(fileName, expectedNorFre);
}
void MaAlignment::printPatObsExpFre(const char *fileName, const IntVector expectedNorFre)
{
try {
ofstream out;
out.exceptions(ios::failbit | ios::badbit);
out.open(fileName);
out << "Pattern\tLogLL\tObservedFre\tExpectedFre" << endl;
int patNum = getNPattern();
int seqNum = getNSeq();
int seqID;
for ( int i = 0; i < patNum; i++ )
{
for ( seqID = 0; seqID < seqNum; seqID++ ){
out << convertStateBackStr(at(i)[seqID]);
}
out << "\t" << logLL[i] << "\t" << (*this)[i].frequency << "\t" << expectedNorFre[i] << endl;
}
out.close();
} catch (ios::failure) {
outError(ERR_WRITE_OUTPUT, fileName);
}
}
void MaAlignment::generateExpectedAlignment(MaAlignment *aln, double &prob)
{
//cout << "In function: generating expected alignment!" << endl;
IntVector expectedNorFre = aln->computeExpectedNorFre();
int nsite = aln->getNSite();
seq_names.insert(seq_names.begin(), aln->seq_names.begin(), aln->seq_names.end());
num_states = aln->num_states;
site_pattern.resize(nsite, -1);
clear();
pattern_index.clear();
VerboseMode save_mode = verbose_mode;
verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern
int patID;
int site = 0;
int npat = aln->getNPattern();
double sumFac = 0;
double sumProb = 0;
double fac = logFac(nsite);
double sumFacMax = 0;
double sumProbMax = 0;
for (patID = 0; patID < npat; patID++) {
int patFre = expectedNorFre[patID];
for ( int patSite = 0; patSite < patFre; patSite++)
{
Pattern pat = aln->at(patID);
addPattern(pat,site);
site++;
}
//to compute the probability of the new alignment given the multinomial distribution
sumFac += logFac(patFre);
sumProb += (double)patFre*log((double)aln->at(patID).frequency/(double)nsite);
//for the unconstraint maximum log likelihood
sumFacMax += logFac(aln->at(patID).frequency);
sumProbMax += (double)aln->at(patID).frequency*log((double)aln->at(patID).frequency/(double)nsite);
}
prob = fac - sumFac + sumProb;
double probMax = fac - sumFacMax + sumProbMax;
// cout << "total number of sites: " << site << endl;
verbose_mode = save_mode;
countConstSite();
//cout << "Finish generating expected alignment!" << endl;
cout << "Logarithm of the probability of the new alignment given the multinomial distribution of the input alignment is: " << prob << endl;
cout << "Maximum unconstraint (log) likelihood of the input alignment: " << probMax << endl;
// cout << "Maximum unconstraint likelihood: " << exp(probMax) << endl;
}
/*void MaAlignment::multinomialProb(Alignment objectAlign, double &prob)
{
cout << "Computing the multinomial probability of an object alignment given a reference alignment ..." << endl;
//should we check for compatibility of sequence's names and sequence's order in THIS alignment and in the objectAlign??
//check alignment length
int nsite = getNSite();
assert(nsite == objectAlign.getNSite());
double sumFac = 0;
double sumProb = 0;
double fac = logFac(nsite);
int index;
for ( Alignment::iterator objectIt = objectAlign.begin(); objectIt != objectAlign.end() ; objectIt++)
{
PatternIntMap::iterator pat_it = pattern_index.find((*objectIt));
if ( pat_it == pattern_index.end() ) //not found ==> error
outError("Pattern in the object alignment is not found in the reference alignment!");
sumFac += logFac((*objectIt).frequency);
index = pat_it->second;
sumProb += (double)(*objectIt).frequency*log((double)at(index).frequency/(double)nsite);
}
prob = fac - sumFac + sumProb;
}*/
/*void MaAlignment::multinomialProb(AlignmentVector objectAligns, DoubleVector &probs)
{
int num = objectAligns.size();
double curProb;
if (num > 0)
{
probs.resize(num,0);
for ( int i = 0; i < num; i++ )
{
(*this).multinomialProb(objectAligns[i], curProb);
probs[i] = curProb;
}
}
}*/
|