File: givenRatesMLDistance.cpp

package info (click to toggle)
fastml 3.11-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,772 kB
  • sloc: cpp: 48,522; perl: 3,588; ansic: 819; makefile: 386; python: 83; sh: 55
file content (139 lines) | stat: -rw-r--r-- 4,879 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
// 	$Id: givenRatesMLDistance.cpp 962 2006-11-07 15:13:34Z privmane $	
#include "givenRatesMLDistance.h"
#include "numRec.h"

class C_eval_likelihoodOfDistanceGivenRates{ 
private:
    const stochasticProcess& _sp;
    const sequence& _s1;
    const sequence& _s2;
    const Vdouble& _rates;
    const Vdouble* _weights;

public:
    C_eval_likelihoodOfDistanceGivenRates(const stochasticProcess& sp,
					  const sequence& s1,
					  const sequence& s2,
					  const Vdouble& rates,
					  const Vdouble  * weights)
	:  _sp(sp),_s1(s1),_s2(s2),_rates(rates),_weights(weights)
	{};

    MDOUBLE operator() (MDOUBLE dist) const {
	MDOUBLE sumL=0.0; // sum of log likelihoods
	MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
	for (int pos=0; pos < _s1.seqLen(); ++pos){
	    if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
	    posLikelihood = 0.0;
	    if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) { 
		// this is the more complicated case, where _s1 = ?, _s2 = specific
		posLikelihood = _sp.freq(_s2[pos]);
	    } else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
		posLikelihood = _sp.freq(_s1[pos]);
	    } else {
		MDOUBLE rate = _rates[pos];
		MDOUBLE pij= 0.0;
		if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {
		    // the simple case, where AA i is changing to AA j
		    pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
		    posLikelihood += pij * _sp.freq(_s1[pos]);
		} else {// this is the most complicated case, when you have
		    // combinations of letters, for example B in one
		    // sequence and ? in the other.
		    for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
			for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
			    if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
				(_s2.getAlphabet()->relations(_s2[pos],iS2))) {
				posLikelihood += _sp.freq(iS1)*_sp.Pij_t(iS1,iS2,dist*rate);
			    }
			}
		    }
		}
	    }
	    assert(posLikelihood>0.0);
	    sumL += log(posLikelihood)*(_weights ? (*_weights)[pos]:1.0);
	}
	return -sumL;
    };
};

class C_eval_likelihoodOfDistanceGivenRates_d{ // derivative.
private:
    const stochasticProcess& _sp;
    const sequence& _s1;
    const sequence& _s2;
    const Vdouble& _rates;
    const Vdouble* _weights;

public:
    C_eval_likelihoodOfDistanceGivenRates_d(const stochasticProcess& sp,
					  const sequence& s1,
					  const sequence& s2,
					  const Vdouble& rates,
					  const Vdouble  * weights)
	:  _sp(sp),_s1(s1),_s2(s2),_rates(rates),_weights(weights)
	{};

    MDOUBLE operator() (MDOUBLE dist) const {
	MDOUBLE sumL=0.0; // sum of log likelihoods
	MDOUBLE posLikelihood = 0.0; // likelihood of a specific position
	MDOUBLE posLikelihood_d = 0.0; // derivative of the likelihood at a specific position
	for (int pos=0; pos < _s1.seqLen(); ++pos){
	    if (_s1.isUnknown(pos) && _s2.isUnknown(pos)) continue; // the case of two unknowns
	    posLikelihood = 0.0;
	    posLikelihood_d = 0.0;
	    if (_s1.isUnknown(pos) && _s2.isSpecific(pos)) { 
		// this is the more complicated case, where _s1 = ?, _s2 = specific
		posLikelihood = _sp.freq(_s2[pos]);
		posLikelihood_d =0.0;
	    } else if (_s2.isUnknown(pos) && _s1.isSpecific(pos)) {
		posLikelihood = _sp.freq(_s1[pos]);
		posLikelihood_d =0.0;
	    } else {
		MDOUBLE rate = _rates[pos];
		MDOUBLE pij= 0.0;
		MDOUBLE dpij=0.0;
		if (_s1.isSpecific(pos) && _s2.isSpecific(pos)) {
		    // the simple case, where AA i is changing to AA j
		    pij= _sp.Pij_t(_s1[pos],_s2[pos],dist*rate);
		    dpij= _sp.dPij_dt(_s1[pos],_s2[pos],dist*rate)*rate;
		    MDOUBLE tmp =  _sp.freq(_s1[pos]);
		    posLikelihood += pij *tmp;
		    posLikelihood_d += dpij*tmp;
		} else {// this is the most complicated case, when you have
		    // combinations of letters, for example B in one
		    // sequence and ? in the other.
		    for (int iS1 =0; iS1< _sp.alphabetSize(); ++iS1) {
			for (int iS2 =0; iS2< _sp.alphabetSize(); ++iS2) {
			    if ((_s1.getAlphabet()->relations(_s1[pos],iS1)) &&
				(_s2.getAlphabet()->relations(_s2[pos],iS2))) {
				MDOUBLE exp = _sp.freq(iS1);
				posLikelihood += exp* _sp.Pij_t(iS1,iS2,dist*rate);
				posLikelihood_d += exp * _sp.dPij_dt(iS1,iS2,dist*rate)*rate;
			    }
			}
		    }
		}
	    }
	    assert(posLikelihood>0.0);
	    sumL += (posLikelihood_d/posLikelihood)*(_weights ? (*_weights)[pos]:1.0);
	}
	return -sumL;
    };
};

const MDOUBLE givenRatesMLDistance::giveDistance(const sequence& s1,
						 const sequence& s2,
						 const vector<MDOUBLE>  * weights,
						 MDOUBLE* score) const
{
    const MDOUBLE ax=0,bx=1.0,cx=_maxPairwiseDistance;
    MDOUBLE dist=-1.0;
    MDOUBLE resL = -dbrent(ax,bx,cx,
			   C_eval_likelihoodOfDistanceGivenRates(_sp,s1,s2,_rates,weights),
			   C_eval_likelihoodOfDistanceGivenRates_d(_sp,s1,s2,_rates,weights),
			   _toll,
			   &dist);
    if (score) *score = resL;
    return dist;
};