File: suffStatGammaMixture.cpp

package info (click to toggle)
fastml 3.11-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,772 kB
  • sloc: cpp: 48,522; perl: 3,588; ansic: 819; makefile: 386; python: 83; sh: 55
file content (236 lines) | stat: -rw-r--r-- 7,477 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
#include "suffStatGammaMixture.h"
#include "mixtureDistribution.h"
#include "computePijComponent.h"
#include "likelihoodComputation.h"
#include "gammaUtilities.h"
#include "uniDistribution.h"


#include <cmath>
#include <fstream>
using namespace likelihoodComputation;


suffStatGammaMixture::suffStatGammaMixture(const stochasticProcess& cur_sp, const sequenceContainer& sc, const tree& inTree)
{
	_pSp = &cur_sp;
	_pSc = &sc;
	_pTree = &inTree;
}

suffStatGammaMixture::~suffStatGammaMixture()
{
}


void suffStatGammaMixture::allocatePlaceForSuffStat() {
	mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	int componentNum = pMixture->getComponentsNum();
	_MkVec.clear();
	_MkVec.resize(componentNum, 0);
	_AkVec.clear();
	_AkVec.resize(componentNum, 0);
	_BkVec.clear();
	_BkVec.resize(componentNum, 0);
}

void suffStatGammaMixture::computePijForEachComponent(vector<computePijGam>& cpgVec,
													  vector<stochasticProcess>& spVec) {
	mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	int componentNum = pMixture->getComponentsNum();
	for (int comp = 0; comp < componentNum; ++comp) {
		//create a local sp so to compute likelihoods of this component only
		stochasticProcess compSp(pMixture->getComponent(comp), _pSp->getPijAccelerator());
		cpgVec[comp].fillPij(*_pTree, compSp);
		spVec.push_back(compSp);
	}
}

void suffStatGammaMixture::computeStatistics()
{
	///////////////as in getTreeLikelihoodAllPosAlphTheSame
	//computePijGam pi;
	//pi.fillPij(*_pTree, *_pSp);
	//MDOUBLE res =0;
	//doubleRep LofPos;
	//int k;
	//for (k=0; k < _pSc->seqLen(); ++k) 
	//{
	//	doubleRep tmp=0;
	//	for (int i=0; i < _pSp->categories();++i) 
	//	{
	//		tmp += getLofPos(k, *_pTree, *_pSc, pi[i], *_pSp) *  _pSp->ratesProb(i);
	//	}
	//	LofPos = tmp;
	//	res += log(LofPos);
	//}
	//////////////////////////////////////////////

	//mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	//int componentNum = pMixture->getComponentsNum();
	//MDOUBLE res2 = 0.0;
	//vector<computePijGam> cpgVec(componentNum);
	//vector<stochasticProcess> spVec;
	//
	//for (int comp = 0; comp < componentNum; ++comp) {
	//	//create a local sp so to compute likelihoods of this component only
	//	stochasticProcess compSp(pMixture->getComponent(comp), _pSp->getPijAccelerator());
	//	cpgVec[comp].fillPij(*_pTree, compSp);
	//	spVec.push_back(compSp);
	//}
	//
	//for (int pos = 0; pos < _pSc->seqLen(); ++pos)
	//{
	//	int comp;
	//	for (comp = 0; comp < componentNum; ++comp)
	//	{
	//		const generalGammaDistribution* pDist = pMixture->getComponent(comp);
	//		for (int cat=0; cat < pDist->categories(); ++cat) 
	//		{
	//			MDOUBLE tmp = pDist->ratesProb(cat) * getLofPos(pos, *_pTree, *_pSc, cpgVec[comp][cat], *_pSp); 
	//			res2 += log(tmp);
	//		}
	//	}
	//}
	//////////////////////////////////////////////
	allocatePlaceForSuffStat();
	mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	int componentNum = pMixture->getComponentsNum();

	//compute Pij for each component
	vector<computePijGam> cpgVec(componentNum);
	vector<stochasticProcess> spVec;
	computePijForEachComponent(cpgVec,spVec);


	//compute statistics: M_k, A_k, B_k
	//Here we sum over all positions.
	//go over all positions [pos] and compute for each component [k]: M_k(pos), E[R]_k(pos), E[logR]_k(pos)
	//Then compute A_k and B_k for that position.
	for (int pos = 0; pos < _pSc->seqLen(); ++pos)
	{
		MDOUBLE sumAllComponents = 0.0;
		Vdouble MkPosVec(componentNum, 0.0); //the contribution of position pos to the M_K statistic
		Vdouble Exp_RkVec(componentNum, 0.0);
		Vdouble Exp_LogRkVec(componentNum, 0.0);
		int comp;
		for (comp = 0; comp < componentNum; ++comp)
		{
			// here we compute P(H[i]=k, data| cur_mixtureDistribution)
			//P(H[i]=k, data| teta) = P(H[i]=k)* (sum_over_all_categories{P(data|r)P(r))
			///////////////////////////
			const generalGammaDistribution* pDist = pMixture->getComponent(comp);
			MDOUBLE Exp_Rk, Exp_LogRk, sum;
			Exp_Rk = Exp_LogRk = sum = 0.0;
			for (int cat=0; cat < pDist->categories(); ++cat) 
			{
				MDOUBLE LofP = convert(likelihoodComputation::getLofPos(pos, *_pTree, *_pSc, cpgVec[comp][cat], spVec[comp]));
				MDOUBLE Pr = pDist->ratesProb(cat) * LofP; 
				sum += Pr;
				Exp_RkVec[comp] += Pr * pDist->rates(cat);
				Exp_LogRkVec[comp] += Pr * log(pDist->rates(cat));
			}
			MkPosVec[comp] = sum;
			sumAllComponents += MkPosVec[comp] * pMixture->getComponentProb(comp);;
		}

		for (comp = 0; comp < componentNum; ++comp)
		{
			MDOUBLE factor = pMixture->getComponentProb(comp)/ sumAllComponents;
			_MkVec[comp] += factor* MkPosVec[comp] ;
			_AkVec[comp] += factor * Exp_RkVec[comp];
			_BkVec[comp] += factor * Exp_LogRkVec[comp];
		}
	}// end of loop over positions
	spVec.clear();
	cpgVec.clear();
}


#include "uniformDistribution.h"
void suffStatGammaMixture::plotStatistics(ofstream& outFile)
{
	mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	if (pMixture->getComponentsNum() != 1)
		errorMsg::reportError("Sorry, I plot only 1 component");

	outFile <<"R"<<"\t"<<"Postr"<<"\t"<<"Er"<<"\t"<<"Elog_r"<<endl;
	const generalGammaDistribution* pDist = pMixture->getComponent(0);
	int numCat = 200, maxR = 10;
	uniformDistribution uniDist(numCat, 0, maxR);
	/////////calc the prior of each interval
	Vdouble priorProbs(uniDist.categories());
	MDOUBLE upperP, lowerP = 0;  
	for (int i = 0; i<uniDist.categories();++i)
	{
		upperP = pDist->getCumulativeProb(uniDist.getBorder(i+1));
		priorProbs[i] = upperP - lowerP;
		lowerP = upperP;
	}

	distribution * pUni =  new uniDistribution;

	stochasticProcess uniSp(pUni, _pSp->getPijAccelerator()); 
	//loop over all r
	for (int ri=0; ri < uniDist.categories(); ++ri) 
	{
		MDOUBLE Exp_R = 0.0;
		MDOUBLE Exp_LogR = 0.0;
		MDOUBLE PosteriorR = 0.0;
		MDOUBLE rate = uniDist.rates(ri);
		if (rate == 0.0)
			rate = 0.000001;
		
		//Here we sum over all positions.
		//go over all positions [pos] and compute: PosrteriorR(=P(D|r)*P(r)), E[R]_k(pos), E[logR]_k(pos)
		for (int pos = 0; pos < _pSc->seqLen(); ++pos)
		{
			MDOUBLE PrPos = priorProbs[ri] * convert(likelihoodComputation::getLofPos(pos, *_pTree, *_pSc, uniSp, rate)); 
			PosteriorR += PrPos;
			Exp_R += PrPos * rate;
			Exp_LogR += PrPos * log(rate);

		}

		outFile <<rate<<"\t"<<PosteriorR<<"\t"<<Exp_R<<"\t"<<Exp_LogR<<endl;
	}

	delete pUni;
}


MDOUBLE suffStatGammaMixture::computeQ2() 
{
	MDOUBLE res=0;

	return res;
}



MDOUBLE suffStatGammaMixture::computeQ() 
{
	mixtureDistribution* pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	MDOUBLE res = 0.0;
	//////////////////////////////////
	MDOUBLE res2 = 0.0;
	int compNum = pMixture->getComponentsNum();
	///////////////////////////////////
	for (int comp = 0;comp < compNum ; ++comp)
	{
		MDOUBLE P_k = pMixture->getComponentProb(comp);
		MDOUBLE alpha_k = pMixture->getAlpha(comp);
		MDOUBLE beta_k = pMixture->getBeta(comp);
		MDOUBLE first = _MkVec[comp] * log(P_k);
		MDOUBLE second = _MkVec[comp] * alpha_k*log(beta_k);
		MDOUBLE third = -_MkVec[comp] * gammln(alpha_k);
		MDOUBLE fourth = -_AkVec[comp]*beta_k;
		MDOUBLE fifth = _BkVec[comp]*(alpha_k-1.0);
		res += _MkVec[comp] * (log(P_k) + alpha_k*log(beta_k) - gammln(alpha_k)) 
			   - (_AkVec[comp]*beta_k) 
			   + _BkVec[comp]*(alpha_k-1);
		////////////////////////////////////
	}
	res2 = computeQ2();
	return res;
}