File: bblEMProportionalEB.cpp

package info (click to toggle)
fastml 3.11-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,772 kB
  • sloc: cpp: 48,522; perl: 3,588; ansic: 819; makefile: 386; python: 83; sh: 55
file content (225 lines) | stat: -rw-r--r-- 10,360 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
// $Id: bblEMProprtional.cpp 962 2006-11-07 15:13:34Z privmane $
#include "bblEM.h"
#include "bblEMProportionalEB.h"
#include "likelihoodComputation.h"
using namespace likelihoodComputation;
#include "computeUpAlg.h"
#include "computeDownAlg.h"
#include "computeCounts.h"
#include "treeIt.h"
#include "fromCountTableComponentToDistance.h"
#include <ctime>//#define VERBOS
#include "fromCountTableComponentToDistancePropEB.h"

bblEMProportionalEB::bblEMProportionalEB(tree& et,
									const vector<sequenceContainer>& sc,
									multipleStochasticProcess* msp,
									const gammaDistribution* pProportionDist,
									const bool optimizeSelectedBranches,
									const vector<Vdouble *> * weights,
									const int maxIterations,
									const MDOUBLE epsilon,
									const MDOUBLE tollForPairwiseDist,
									const MDOUBLE* likelihoodLast):

_et(et),_sc(sc),_msp(msp),_pProportionDist(pProportionDist),_weights (weights),_optimizeSelectedBranches(optimizeSelectedBranches) {
	_numberOfGenes = _sc.size();
	assert(_msp->getSPVecSize() == _sc.size());
	_treeLikelihoodVec = compute_bblEMPropEB(maxIterations,epsilon,tollForPairwiseDist,likelihoodLast);
}

Vdouble bblEMProportionalEB::compute_bblEMPropEB(
			const int maxIterations,
			const MDOUBLE epsilon,
			const MDOUBLE tollForPairwiseDist,
			const MDOUBLE* likelihoodLast){
	LOGnOUT(5,<<"Allocating place"<<endl);
	allocatePlacePropEB();
	LOGnOUT(5,<<"Done Allocating place"<<endl);
	Vdouble oldLvec(_numberOfGenes,VERYSMALL);
	Vdouble currLvec;
	currLvec.resize(_numberOfGenes);
	tree oldT = _et;
	//doubleRep epsilonDR(epsilon);//DR
	for (int i=0; i < maxIterations; ++i) {
		LOGnOUT(5,<<"Calling computeUpPropEB on iteration "<<i<<endl);
		computeUpPropEB();
		for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
			currLvec[geneN] = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc[geneN],*_msp->getSp(geneN),_cup[geneN],_pProportionDist,_posLike[geneN],(_weights?(*_weights)[geneN]:NULL));
		}
		LOGnOUT(5,<<"--- Iter="<<i<<" logL="<<sumVdouble(currLvec)<<endl);
		if(sumVdouble(oldLvec)<=sumVdouble(currLvec)){	// make sure not to use tree with lower likelihood then last computed likelihood (before BBL-EM)
			LOGnOUT(4,<<"Likelihood improved. oldL = "<<sumVdouble(oldLvec)<<" newL = "<<sumVdouble(currLvec)<<endl);
			if(likelihoodLast){
				if(*likelihoodLast<=sumVdouble(currLvec))
					oldT = _et;	 // L didn't go down
				else
					LOGnOUT(4,<<"Likelihood went down compared pre-BBL oldL="<<*likelihoodLast<<" newL="<<sumVdouble(currLvec)<<" Do not update tree"<<endl);
			}
			else{
				oldT = _et;	 // L didn't go down
				LOGnOUT(7,<<"Tree Updated"<<endl);
			}
		}
		else 
			LOGnOUT(4,<<"Likelihood did not improve. oldL="<<sumVdouble(oldLvec)<<" newL="<<sumVdouble(currLvec)<<" Do not update tree"<<endl);				

		if (sumVdouble(currLvec) < sumVdouble(oldLvec) + epsilon) { // need to break
			if (sumVdouble(currLvec)<sumVdouble(oldLvec)) {
				_et = oldT; //return to older tree
				LOGnOUT(4,<<"Finished bblEMPropEB. Likelihood ="<<sumVdouble(oldLvec)<<endl);
				return oldLvec; // keep the old tree, and old likelihood
			} else {
                //update the tree and likelihood and return
				LOGnOUT(4,<<"Finished bblEMPropEB. Likelihood ="<<sumVdouble(currLvec)<<endl);
				return currLvec;
			}
		}
		bblEM_itPropEB(tollForPairwiseDist);
		oldLvec = currLvec;
	}
	// in the case were we reached max_iter, we have to recompute the likelihood of the new tree...
	computeUpPropEB();
	for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
		currLvec[geneN] = likelihoodComputation::getTreeLikelihoodFromUp2(_et,_sc[geneN],*_msp->getSp(geneN),_cup[geneN],_pProportionDist,_posLike[geneN],(_weights?(*_weights)[geneN]:NULL));
	}
	if (sumVdouble(currLvec)<sumVdouble(oldLvec)) {
		_et = oldT;
		LOGnOUT(4,<<"Finished bblEMPropEB max iter. Likelihood ="<<sumVdouble(oldLvec)<<endl);
		return oldLvec; // keep the old tree, and old likelihood
	} 
	else{
		LOGnOUT(4,<<"Finished bblEMPropEB max iter. Likelihood ="<<sumVdouble(currLvec)<<endl);
        return currLvec;
	}
}

void bblEMProportionalEB::allocatePlacePropEB() {
	_computeCountsV.resize(_numberOfGenes);
	_cup.resize(_numberOfGenes);
	_cdown.resize(_numberOfGenes);
	_pij.resize(_numberOfGenes);
	_posLike.resize(_numberOfGenes);
	for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
		_posLike[geneN].resize(_sc[geneN].seqLen());
		for(int pos = 0;pos < _sc[geneN].seqLen();++pos){
			_posLike[geneN][pos].resize(_pProportionDist->categories(),0.0);
		}
		stochasticProcess * sp = _msp->getSp(geneN);
		_pij[geneN].resize(_pProportionDist->categories());
		_computeCountsV[geneN].resize(_et.getNodesNum()); //initiateTablesOfCounts
		for (int i=0; i < _computeCountsV[geneN].size(); ++i) {
			_computeCountsV[geneN][i].countTableComponentAllocatePlace(sp->alphabetSize(),_pProportionDist->categories(),sp->categories());
		}
		_cup[geneN].allocatePlace(_sc[geneN].seqLen(),_pProportionDist->categories(),sp->categories(),_et.getNodesNum(), _sc[geneN].alphabetSize());
		_cdown[geneN].allocatePlace(_pProportionDist->categories(),sp->categories(),_et.getNodesNum(), _sc[geneN].alphabetSize());
	}
}

void bblEMProportionalEB::computeUpPropEB(){
	for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
		for(int globalRateCategor = 0;globalRateCategor < _pProportionDist->categories();++globalRateCategor){
			_msp->getSp(geneN)->setGlobalRate(_pProportionDist->rates(globalRateCategor));
			_pij[geneN][globalRateCategor].fillPij(_et,*_msp->getSp(geneN),0); // 0 is becaues we compute Pij(t) and not its derivations...
			computeUpAlg cupAlg;
			for (int pos=0; pos < _sc[geneN].seqLen(); ++pos) {
				for (int localRateCategor = 0; localRateCategor < _msp->getSp(geneN)->categories(); ++localRateCategor) {
					cupAlg.fillComputeUp(_et,_sc[geneN],pos,_pij[geneN][globalRateCategor][localRateCategor],_cup[geneN][pos][globalRateCategor][localRateCategor]);
				}
			}
		}
	}
}

void bblEMProportionalEB::bblEM_itPropEB(const MDOUBLE tollForPairwiseDist){
	for (int geneN=0; geneN < _numberOfGenes; ++geneN) {
		for (int treeNode=0; treeNode < _computeCountsV[geneN].size(); ++treeNode) {
            _computeCountsV[geneN][treeNode].zero();
		}
		for (int pos=0; pos < _sc[geneN].seqLen(); ++pos) {
			computeDownPropEB(geneN,pos);
			addCountsPropEB(geneN,pos); // computes the counts and adds to the table.
		}
	}
	optimizeBranchesPropEB(tollForPairwiseDist);
}

void bblEMProportionalEB::computeDownPropEB(const int gene, const int pos){
	computeDownAlg cdownAlg;
	stochasticProcess * sp = _msp->getSp(gene);
	for (int globalRateCategor = 0; globalRateCategor < _pProportionDist->categories(); ++globalRateCategor) {
		for (int localRateCategor = 0; localRateCategor < sp->categories(); ++localRateCategor) {
			//no need to set the global rate for each sp cause it was already performed in computeUpPropEB
			cdownAlg.fillComputeDown(_et,_sc[gene],pos,_pij[gene][globalRateCategor][localRateCategor],_cdown[gene][globalRateCategor][localRateCategor],_cup[gene][pos][globalRateCategor][localRateCategor]);
		}
	}
}

void bblEMProportionalEB::addCountsPropEB(const int gene, const int pos){
	vector<MDOUBLE> * weightsOfGene = (_weights?(*_weights)[gene]:NULL);					
	MDOUBLE weig = (weightsOfGene ? (*weightsOfGene)[pos] : 1.0);
	if (weig == 0) return;
	treeIterDownTopConst tIt(_et);
	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
		if (!tIt->isRoot()) {
			addCountsPropEB(gene,pos,mynode,_posLike[gene][pos],weig);
		}
	}
}

void bblEMProportionalEB::addCountsPropEB(const int gene,const int pos, tree::nodeP mynode, const VdoubleRep posProb, const MDOUBLE weig){
	computeCounts cc;
	stochasticProcess * sp = _msp->getSp(gene);
	for (int globalRateCategor =0; globalRateCategor< _pProportionDist->categories(); ++globalRateCategor) {
		for (int localRateCategor =0; localRateCategor < sp->categories(); ++localRateCategor) {
			//cc.computeCountsNodeFatherNodeSonHomPosProportionalEB(_sc[gene],
			//							_pij[gene][globalRateCategor][localRateCategor],
			//							*sp,
			//							_cup[gene][pos][globalRateCategor][localRateCategor],
			//							_cdown[gene][globalRateCategor][localRateCategor],
			//							weig,
			//							posProb,
			//							mynode,
			//							_computeCountsV[gene][mynode->id()][globalRateCategor][localRateCategor],
			//							_pProportionDist->ratesProb(globalRateCategor)*sp->ratesProb(localRateCategor));
			cc.computeCountsNodeFatherNodeSonHomPosProportionalEB(_sc[gene],
										_pij[gene][globalRateCategor][localRateCategor],
										*sp,
										_cup[gene][pos][globalRateCategor][localRateCategor],
										_cdown[gene][globalRateCategor][localRateCategor],
										weig,
										posProb,
										mynode,
										_computeCountsV[gene][mynode->id()][globalRateCategor][localRateCategor]);
		}
	}
}

/*
//tal's old implementation, where i think there's a bug cause he sends _computeCountsV[mynode->id()] to the 
//fromCountTableComponentToDistanceProp constructor, but the first dimension of _computeCountsV is the genes and
//the tree nodes is only the second dimension
void bblEMProportionalEB::optimizeBranchesPropEB(const MDOUBLE tollForPairwiseDist){
	treeIterDownTopConst tIt(_et);
	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
		if (!tIt->isRoot()) {
			fromCountTableComponentToDistanceProp from1(_computeCountsV[mynode->id()],_sp,tollForPairwiseDist,mynode->dis2father());
			from1.computeDistance();
			mynode->setDisToFather(from1.getDistance());
		}
	}
}
*/

void bblEMProportionalEB::optimizeBranchesPropEB(const MDOUBLE tollForPairwiseDist){
	treeIterDownTopConst tIt(_et);
	for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
		if (!tIt->isRoot()) {
			if((_optimizeSelectedBranches) && (tIt->getComment() != "1")) continue; //only selected branhes will be optimized
			fromCountTableComponentToDistancePropEB from1(_computeCountsV,mynode->id(),_msp,_pProportionDist,tollForPairwiseDist,mynode->dis2father());
			from1.computeDistance();
			mynode->setDisToFather(from1.getDistance());
		}
	}
}