1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
|
// $Id: likeDistfixRoot.h 4470 2008-07-17 15:37:40Z cohenofi $
#ifndef ___LIKE_DIST_H_GL_FIX_ROOT
#define ___LIKE_DIST_H_GL_FIX_ROOT
#include "definitions.h"
#include "countTableComponent.h"
#include "distanceMethod.h"
#include "stochasticProcess.h"
#include "logFile.h"
#include "jcDistance.h"
#include "sequenceContainer.h"
#include "unObservableData.h"
#include <cmath>
using namespace std;
class likeDistfixRoot : public distanceMethod {
public:
// WARNING: the stochasticProcess is NOT copied. The same object is used
explicit likeDistfixRoot(const stochasticProcess& sp,
const MDOUBLE toll =0.0001,
const MDOUBLE maxPairwiseDistance = 5.0,
const MDOUBLE minPairwiseDistance = 0.0000001,
unObservableData* unObservableData_p=NULL)
: _sp(sp),_nonConstSpPtr(NULL),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance),_unObservableData_p(unObservableData_p) {}
likeDistfixRoot(const likeDistfixRoot& other)
: _sp(other._sp),_nonConstSpPtr(other._nonConstSpPtr),_toll(other._toll),_maxPairwiseDistance(other._maxPairwiseDistance),_minPairwiseDistance(other._minPairwiseDistance),_jcDist(other._jcDist) {}
virtual likeDistfixRoot* clone() const {return new likeDistfixRoot(*this);}
// This constructor allows non-const stochasticProcess so that likeDistfixRoot will be able to change alpha, etc.
explicit likeDistfixRoot(stochasticProcess& sp,
const MDOUBLE toll =0.0001,
const MDOUBLE maxPairwiseDistance = 5.0,
const MDOUBLE minPairwiseDistance = 0.0000001)
: _sp(sp),_nonConstSpPtr(&sp),_toll(toll),_maxPairwiseDistance(maxPairwiseDistance),_minPairwiseDistance(minPairwiseDistance) {}
// THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN RESQ, BUT RATHER "Q", THE CONTRIBUTION of this edge
// TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
// NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
const MDOUBLE giveDistance(const vector<countTableComponentGam>& ctc,
MDOUBLE& resQ,
const MDOUBLE initialGuess= 0.03) const; // initial guess
// given two sequences, it evaluates the log likelihood.
MDOUBLE evalLogLikelihoodGivenDistance(const sequence& s1,
const sequence& s2,
const MDOUBLE dis2evaluate);
// returns the estimated ML distance between the 2 sequences.
// if score is given, it will be the log-likelihood.
const MDOUBLE giveDistance(const sequence& s1,
const sequence& s2,
const vector<MDOUBLE> * weights,
MDOUBLE* score=NULL) const;
// this function creates a countTableComponent (ctc) from the two sequences.
// it then computes the distance from this ctc.
// THIS FUNCTION DOES NOT RETURN THE LOG LIKELIHOOD IN score, BUT RATHER "Q", THE CONTRIBUTION of this edge
// TO THE EXPECTED LOG-LIKELIHOOD (SEE SEMPHY PAPER).
// NEVERTHELESS, THE t that optimizes Q is the same t that optimizes log-likelihood.
//MDOUBLE giveDistanceThroughCTC(const sequence& s1,
// const sequence& s2,
// const vector<MDOUBLE> * weights,
// MDOUBLE* score=NULL) const;
const MDOUBLE giveLikelihood(const sequence& s1,
const sequence& s2,
MDOUBLE distance,
const vector<MDOUBLE> * weights=NULL) const;
// return the stochasticProcess
const stochasticProcess& getStochasticProcess() const {return _sp;}
stochasticProcess& getNonConstStochasticProcess();
bool isTheInternalStochasticProcessConst() const {return !_nonConstSpPtr;}
MDOUBLE getToll() const {return _toll;}
MDOUBLE getMaxPairwiseDistance() const {return _maxPairwiseDistance;}
MDOUBLE getMinPairwiseDistance() const {return _minPairwiseDistance;}
protected:
const stochasticProcess &_sp;
stochasticProcess *_nonConstSpPtr;
const MDOUBLE _toll;
const MDOUBLE _maxPairwiseDistance;
const MDOUBLE _minPairwiseDistance;
jcDistance _jcDist;
unObservableData* _unObservableData_p;
private:
const MDOUBLE giveDistanceBrent( const vector<countTableComponentGam>& ctc,
MDOUBLE& resL,
const MDOUBLE initialGuess= 0.03) const; // initial guess
const MDOUBLE giveDistanceNR( const countTableComponentGam& ctc,
MDOUBLE& resL,
const MDOUBLE initialGuess= 0.03) const; // initial guess
public:
static MDOUBLE evalLikelihoodForDistance(const stochasticProcess& sp,
const sequence& s1,
const sequence& s2,
const MDOUBLE dist,
const vector<MDOUBLE> * weights=NULL);
};
class C_evallikeDistfixRoot{
private:
const vector<countTableComponentGam>& _ctc;
const stochasticProcess& _sp;
unObservableData* _unObservableData_p;
public:
C_evallikeDistfixRoot(const vector<countTableComponentGam>& ctc, // ctc[letterAtRoot][rate][alph][alph]
const stochasticProcess& inS1, unObservableData* unObservableData_p=NULL)
:_ctc(ctc), _sp(inS1),_unObservableData_p(unObservableData_p) {};
MDOUBLE operator() (MDOUBLE dist)
{
//if(_plogLforMissingData){
// sequenceContainer scZero;
// gainLossAlphabet alph;
// scZero.startZeroSequenceContainerGL(_sc, alph);
// *_plogLforMissingData = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(_et,scZero,*_sp);
//}
const MDOUBLE epsilonPIJ = 1e-10;
MDOUBLE sumL=0.0;
for (int letterAtRoot = 0; letterAtRoot < _sp.alphabetSize(); ++letterAtRoot){
for (int alph1=0; alph1 < _sp.alphabetSize(); ++alph1){
for (int alph2=0; alph2 < _sp.alphabetSize(); ++alph2){
for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
MDOUBLE rate = _sp.rates(rateCategor);
MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
if (pij<epsilonPIJ) pij = epsilonPIJ;//SEE REMARK (1) FOR EXPLANATION
sumL += _ctc[letterAtRoot].getCounts(alph1,alph2,rateCategor)
//*_sp.freq(letterAtRoot)
//*(log(pij)-log(_sp.freq(letterAtRoot))) ;
//*_sp.freq(letterAtRoot)
*(log(pij)-log(_sp.freq(alph2))) ;
}
}
}
}
//if(_unObservableData_p)
// sumL = sumL/(1- exp(_unObservableData_p->getlogLforMissingData())); // need to find an efficient way to update LofMissingData with dist
LOG(8,<<"check bl="<<dist<<" gives sumL "<<sumL<<endl);
return -sumL;
};
};
// REMARK 1: THE LINE if if (pij<epsilonPIJ) pij = epsilonPIJ
// There are cases when i != j, and t!=0, and yet pij =0, because of numerical problems
// For these cases, it is easier to assume pij is very small, so that log-pij don't fly...
class C_evalLikeDist_dfixRoot{ // derivative.
public:
C_evalLikeDist_dfixRoot(const vector<countTableComponentGam>& ctc,
const stochasticProcess& inS1) : _ctc(ctc), _sp(inS1) {};
private:
const vector<countTableComponentGam>& _ctc;
const stochasticProcess& _sp;
public:
MDOUBLE operator() (MDOUBLE dist) {
MDOUBLE sumDL=0.0;
for (int letterAtRoot = 0; letterAtRoot < _sp.alphabetSize(); ++letterAtRoot){
for (int alph1=0; alph1 < _ctc[letterAtRoot].alphabetSize(); ++alph1){
for (int alph2=0; alph2 < _ctc[letterAtRoot][alph1].alphabetSize(); ++alph2){
for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
MDOUBLE rate = _sp.rates(rateCategor);
MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
MDOUBLE dpij = _sp.dPij_dt(alph1,alph2,dist*rate);
//cout<<letterAtRoot<<"\n";
//cout<<alph1<<"\n";
//cout<<alph2<<"\n";
//cout<<rateCategor<<"\n";
//cout<<rate<<"\n";
//cout<<_ctc[letterAtRoot].getCounts(alph1,alph2,rateCategor)<<"\n";
sumDL+= _ctc[letterAtRoot].getCounts(alph1,alph2,rateCategor)*dpij
//*_sp.freq(letterAtRoot)
*rate/pij ;
}
}
}//cerr<<"derivation = "<<-sumDL<<endl;
}
LOG(8,<<"check bl="<<dist<<" gives sumDL "<<sumDL<<endl);
return -sumDL;
};
};
//////////////////////////////////////////////////////////////////////////
class C_evalLikeDist_d2GLfixRoot{ // second derivative.
public:
C_evalLikeDist_d2GLfixRoot(const countTableComponentGam& ctc,
const stochasticProcess& inS1) : _ctc(ctc), _sp(inS1) {};
private:
const countTableComponentGam& _ctc;
const stochasticProcess& _sp;
public:
MDOUBLE operator() (MDOUBLE dist) {
MDOUBLE sumDL=0.0;
for (int alph1=0; alph1 < _ctc.alphabetSize(); ++alph1){
for (int alph2=0; alph2 < _ctc.alphabetSize(); ++alph2){
for (int rateCategor = 0; rateCategor<_sp.categories(); ++rateCategor) {
MDOUBLE rate = _sp.rates(rateCategor);
MDOUBLE pij= _sp.Pij_t(alph1,alph2,dist*rate);
MDOUBLE dpij = _sp.dPij_dt(alph1,alph2,dist*rate);
MDOUBLE d2pij = _sp.d2Pij_dt2(alph1,alph2,dist*rate);
sumDL+= rate*_ctc.getCounts(alph1,alph2,rateCategor)*
(pij*d2pij - dpij *dpij )/(pij*pij);
}
}
}
return -sumDL;
};
};
#endif
|