File: fastStartTree.cpp

package info (click to toggle)
fastml 3.11-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,772 kB
  • sloc: cpp: 48,522; perl: 3,588; ansic: 819; makefile: 386; python: 83; sh: 55
file content (145 lines) | stat: -rw-r--r-- 4,081 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
// $Id: fastStartTree.cpp 962 2006-11-07 15:13:34Z privmane $

#include "definitions.h"
#include "tree.h"
#include "treeUtil.h"
#include "fastStartTree.h"
#include "bblEM.h"
#include "likeDist.h"
#include "likelihoodComputation.h"
#include "getRandomWeights.h"
#include "distanceTable.h"
#include "nj.h"
#include "logFile.h"

#include <algorithm>

using namespace std;
using namespace likelihoodComputation;


vector<tree> eliminateHalf(vector<tree>& tVec,
						   sequenceContainer& orginal,
						   stochasticProcess& sp,
						   ostream& out,
						   const int maxIterEM){
	vector<MDOUBLE> likeScore(tVec.size(),0.0);
	int i;
	for (i=0; i < tVec.size(); ++i) {
		bblEM bblEM1(tVec[i],orginal,sp,NULL,maxIterEM,0.01);
		likeScore[i] = bblEM1.getTreeLikelihood();
											
		LOG(5,<<"~");
	}

	vector<MDOUBLE> sortedL = likeScore;
	sort(sortedL.begin(),sortedL.end());
	MDOUBLE median = sortedL[sortedL.size()/2];

	// printing the top ten with their scores;
//	int toPrint = sortedL.size()>10? 10 : sortedL.size();
//	MDOUBLE treshToPrint = sortedL[sortedL.size()-toPrint];
//	out<<"current best 10 (or less) trees: "<<endl;
//	for (int h=0; h < likeScore.size(); ++h) {
//		if (likeScore[h]>treshToPrint) {
//			out<<"likelihood of tree: "<<h<<" = "<<likeScore[h]<<endl;
//			tVec[h].output(out);
//		}
//	}

	for (int p=0; p < sortedL.size(); ++p ){
		out<<"L["<<p<<"]= "<<sortedL[p]<<endl;
	}
	out<<endl;

	vector<tree> newTreeVec;
	for (i=0;i < tVec.size(); ++i) {
		if (likeScore[i]>=median) newTreeVec.push_back(tVec[i]); // ok this is a heck to mark trees
	}
	if (newTreeVec.size() == 0 ) newTreeVec.push_back(tVec[0]); // in case for example that all have the same L
	return newTreeVec;
}



		





		
//------------------ get N starting different NJ trees --------------------

tree getBestMLTreeFromManyNJtrees(sequenceContainer & allTogether,
								stochasticProcess& sp,
								const int numOfNJtrees,
								const MDOUBLE tmpForStartingTreeSearch,
								const MDOUBLE epslionWeights,
								ostream& out) {


	likeDist pd1(sp,0.01);
	vector<tree> tVec;
	int treeTries = 0;
	while (tVec.size() < numOfNJtrees) {
		++treeTries;
		if (treeTries == 5000) break;

		Vdouble startingTreeWeights(allTogether.seqLen(),1.0);
		if (treeTries>1) {// the first is the regular NJ tree
 			getRandomWeights::randomWeightsGamma(startingTreeWeights,
									tmpForStartingTreeSearch);
		}
		for (int p=0; p < startingTreeWeights.size(); ++p){
			if (startingTreeWeights[p]<epslionWeights) startingTreeWeights[p]=0.0;
		}
		#ifdef VERBOS
		if (treeTries ==2){ LOG(5,<<" weights for the 25 positions"<<endl);
			for (int h=0; h < 25; ++h) LOG(5,<<startingTreeWeights[h]<<" ");
		}
		#endif
		VVdouble disTab;
		vector<string> vNames;
		giveDistanceTable(&pd1,
						   allTogether,
						   disTab,
						   vNames,
						   &startingTreeWeights);
		NJalg nj1;
		tree et = nj1.computeTree(disTab,vNames);

		bool treeAlreadyThere = false;
		for (int z=0; z< tVec.size();++z) {
			if (sameTreeTolopogy(tVec[z],et)) treeAlreadyThere=true;
		}
		if (treeAlreadyThere == false) {
			tVec.push_back(et);
		}
	}
	LOG(5,<<"from number of tree tried: "<<treeTries<<" got: "<<numOfNJtrees<<" trees"<<endl);
	out<<"from number of tree tried: "<<treeTries<<" got: "<<numOfNJtrees<<" trees"<<endl;

	int numOfTreesToPrint = tVec.size()<10?tVec.size():10;
	out<<"starting with: "<<tVec.size()<<" trees! "<<endl;
	for (int g=0; g < numOfTreesToPrint; ++g) tVec[g].output(out);

//------------------ chossing the ML tree from these NJ trees --------------------
	int maxIterEM=0;
	while (tVec.size() > 1) {
		LOG(5,<<" current size = "<<tVec.size()<<endl);
		tVec = eliminateHalf(tVec,allTogether,sp,out,maxIterEM);
		maxIterEM=1; // first round without bbl at all.
	}
	LOG(5,<<" final size = "<<tVec.size()<<endl);

	bblEM bblEM1(tVec[0],allTogether,sp,NULL,100,0.01);
	MDOUBLE res = bblEM1.getTreeLikelihood();
											

	LOGDO(5,tVec[0].output(myLog::LogFile()));
	LOG(5,<<"likelihood = "<<res<<endl);
	tVec[0].output(out);
	out<<"likelihood = "<<res<<endl;
	return tVec[0];
}