File: pairwiseGammaDistance.cpp

package info (click to toggle)
fastml 3.11-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,772 kB
  • sloc: cpp: 48,522; perl: 3,588; ansic: 819; makefile: 386; python: 83; sh: 55
file content (158 lines) | stat: -rw-r--r-- 5,075 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
// $Id: pairwiseGammaDistance.cpp 962 2006-11-07 15:13:34Z privmane $

#include "pairwiseGammaDistance.h"
#include "numRec.h"
#include "countTableComponent.h"
#include "likeDist.h"
#include "uniDistribution.h"
#include <cmath>

// Local utility functions
MDOUBLE pairwiseGammaDistance::giveInitialGuessOfDistance(
    const sequence& s1,
    const sequence& s2,
    const vector<MDOUBLE>  * weights,
    MDOUBLE* score) const {
    uniDistribution ud;
    stochasticProcess uniSp(&ud,_sp.getPijAccelerator());
    likeDist ld(uniSp);
    return (ld.giveDistance(s1,s2,weights,score));
}

class C_eval_gammaMLAlpha{ 
private:
    const stochasticProcess& _sp;
    const sequence& _s1;
    const sequence& _s2;
    const MDOUBLE _distance;
    const Vdouble* _weights;
    //  const VVdouble& _posteriorProb; // pos, rate
public:
    C_eval_gammaMLAlpha(const stochasticProcess& sp,
			const sequence& s1,
			const sequence& s2,
			const MDOUBLE distance,
			//		      const VVdouble& posteriorProb,
			const Vdouble  * weights):  _sp(sp),
						    _s1(s1),
						    _s2(s2),
						    _distance(distance),
						    _weights(weights) 
	//						  _posteriorProb(posteriorProb)
	{};

    // this cast is required as the distribution within the
    // stochasticProcess is kept as the parent "distribution" class that
    // knows nothing of Alpha
    void setAlpha(MDOUBLE alpha) {
	(static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
    }


    MDOUBLE operator() (MDOUBLE alpha) {
	setAlpha(alpha);
	MDOUBLE likelihood = likeDist::evalLikelihoodForDistance(_sp,_s1,_s2,_distance,_weights);
	LOG(11,<<"check alpha="<<alpha<<", bl="<<_distance<<" gives "<<likelihood<<endl);
	return -likelihood;
    };
};

// returns the best alpha for a given distance
MDOUBLE pairwiseGammaDistance::optimizeAlphaFixedDist(const sequence& s1,
						      const sequence& s2,
						      stochasticProcess & sp,
						      const MDOUBLE branchL,
						      const vector<MDOUBLE>  * weights,
						      MDOUBLE* score) const { // changes sp.
    MDOUBLE bestA=0.0;
    MDOUBLE bestQ=0.0;
    const MDOUBLE upperBoundOnAlpha = 15.0;
    const MDOUBLE epsilonAlphaOptimization = 0.01;
    const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
    const MDOUBLE bx=cx*0.3;
    const MDOUBLE ax=0.0;

	
    bestQ = -brent(ax,bx,cx,
		   C_eval_gammaMLAlpha(sp,s1,s2,branchL,weights),
		   epsilonAlphaOptimization,
		   &bestA);
    (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
    if (score) *score = bestQ;
    return bestA;
}

class C_evalAlphaForPairOfSeq{
private:
    const countTableComponentGam& _ctc;
    stochasticProcess& _sp;
    const MDOUBLE _branchL;
public:
    C_evalAlphaForPairOfSeq(const countTableComponentGam& ctc,
			    const MDOUBLE branchL,
			    stochasticProcess& sp):_ctc(ctc), _sp(sp), _branchL(branchL) {};

    MDOUBLE operator() (MDOUBLE alpha) {
	(static_cast<gammaDistribution*>(_sp.distr()))->setAlpha(alpha);
	C_evalLikeDist cev(_ctc,_sp);
	MDOUBLE L=cev(_branchL);
	LOG(10,<<"check alpha="<<alpha<<", bl="<<_branchL<<" gives "<<L<<endl);
	return L;
    };
};

// returns the best alpha for a given distance
MDOUBLE pairwiseGammaDistance::optimizeAlphaFixedDist(stochasticProcess & sp,
						      const countTableComponentGam & ctc,
						      const MDOUBLE branchL,
						      const vector<MDOUBLE>  * weights,
						      MDOUBLE* score) const { // changes sp.
    MDOUBLE bestA=0.0;
    MDOUBLE bestQ=0.0;
    const MDOUBLE upperBoundOnAlpha = 15.0;
    const MDOUBLE epsilonAlphaOptimization = 0.01;
    const MDOUBLE cx=upperBoundOnAlpha;// left, midle, right limit on alpha
    const MDOUBLE bx=cx*0.3;
    const MDOUBLE ax=0.0;

	
    bestQ = -brent(ax,bx,cx,
		   C_evalAlphaForPairOfSeq(ctc,branchL,sp),
		   epsilonAlphaOptimization,
		   &bestA);
    (static_cast<gammaDistribution*>(sp.distr()))->setAlpha(bestA);
    if (score) *score = bestQ;
    return bestA;
}

const MDOUBLE pairwiseGammaDistance::giveDistance(const sequence& s1,
					    const sequence& s2,
					    const vector<MDOUBLE>  * weights,
					    MDOUBLE* score,
					    MDOUBLE* alpha) const {
  
    MDOUBLE resL = 0.0;
    MDOUBLE currentDistance = giveInitialGuessOfDistance(s1,s2,weights,&resL);
	
    countTableComponentGam ctc; // from technical reasons.
	
    stochasticProcess tmpSp(_sp);
	
    const int maxIter = 30;
    MDOUBLE newDist = 0.0;
    MDOUBLE lastBestAlpha = 0.0;
    for (int i=0; i < maxIter; ++i) {
	lastBestAlpha = optimizeAlphaFixedDist(s1, s2, tmpSp, currentDistance, weights, &resL); // changes sp.
	LOG(8,<<"lastBestAlpha="<<lastBestAlpha<<"("<<"\t L="<<resL<<"\t");
	likeDist tmpld(tmpSp); // we must create a new ld, that will include the stochastic process with the new alpha
	newDist = tmpld.giveDistance(s1, s2, weights, &resL);
	LOG(8,<<"dist="<<newDist<<"(L="<<resL<<")"<<endl);
	if (fabs(newDist-currentDistance)<_toll) break;
	currentDistance = newDist;
    }
    if (score) *score = resL;
    if (alpha) *alpha = lastBestAlpha;
    assert (newDist >=0);
    return newDist;
}