File: rategamma.h

package info (click to toggle)
iqtree 1.5.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 9,780 kB
  • ctags: 11,529
  • sloc: cpp: 96,162; ansic: 59,874; python: 242; sh: 189; makefile: 45
file content (296 lines) | stat: -rw-r--r-- 9,053 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
/***************************************************************************
 *   Copyright (C) 2009 by BUI Quang Minh   *
 *   minh.bui@univie.ac.at   *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/
#ifndef RATEGAMMA_H
#define RATEGAMMA_H

#include "rateheterogeneity.h"

const int GAMMA_CUT_MEDIAN = 1; // 2 discrete Gamma approximations (mean or median) of Yang 1994
const int GAMMA_CUT_MEAN   = 2;

class PhyloTree;
/**
Discrete gamma distributed site-rate model from Yang 1994

	@author BUI Quang Minh <minh.bui@univie.ac.at>
*/
class RateGamma: virtual public RateHeterogeneity
{

	friend class RateGammaInvar;

public:
	/**
		constructor
		@param ncat number of rate categories
		@param shape Gamma shape parameter
		@param tree associated phylogenetic tree
	*/
    RateGamma(int ncat, double shape, bool median, PhyloTree *tree);

	/**
		destructor
	*/
    virtual ~RateGamma();

    /**
        save object into the checkpoint
    */
    virtual void saveCheckpoint();

    /**
        restore object from the checkpoint
    */
    virtual void restoreCheckpoint();

	/**
		@return true if this is a Gamma model (default: false)
	*/	
    virtual int isGammaRate() { 
        if (cut_median) return GAMMA_CUT_MEDIAN; 
        return GAMMA_CUT_MEAN;
    }

	virtual double getGammaShape() { return gamma_shape; }

	virtual void setGammaShape(double gs);

	/**
	 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f}
	 */
	virtual string getNameParams();

	/**
		@return TRUE to use median rate for discrete categories, FALSE to use mean rate instead
        OBSOLETE, see isGammaRate()
	*/
//	bool isCutMedian() { return cut_median; }

	/**
		@return the number of rate categories
	*/
	virtual int getNRate() { return ncategory; }

	/**
		get the number of rate categories for site-specific category model
		@return the number of rate categories
	*/
	virtual int getNDiscreteRate() { return ncategory; }

	/**
		@param category category ID from 0 to #category-1
		@return the rate of the specified category
	*/
	virtual double getRate(int category) { return rates[category]; }

	/**
		get the proportion of sites under a specified category.
		@param category category ID from 0 to #category-1
		@return the proportion of the specified category
	*/
	virtual double getProp(int category) { return 1.0/ncategory; }

	/**
	 * 	return pointer to the rate array
	 */
	virtual double* getRates() { return rates; }

	/** discrete Gamma according to Yang 1994 (JME 39:306-314) and using median cutting point
		It takes 'ncategory' and 'gamma_shape' variables as input. On output, it write to 'rates' variable.
	*/
	void computeRates();

	/** discrete Gamma according to Yang 1994 (JME 39:306-314) and using mean of the portion of gamma distribution
		It takes 'ncategory' and 'gamma_shape' variables as input. On output, it write to 'rates' variable.
	*/
	void computeRatesMean ();

	/**
		Compute site-specific rates. Override this for Gamma model
		@param pattern_rates (OUT) pattern rates. Resizing if necesary
        @return total number of categories
	*/
	virtual int computePatternRates(DoubleVector &pattern_rates, IntVector &pattern_cat);

	/**
	 * setup the bounds for joint optimization with BFGS
	 */
	virtual void setBounds(double *lower_bound, double *upper_bound, bool *bound_check);

	/**
		the target function which needs to be optimized
		@param x the input vector x
		@return the function value at x
	*/
	virtual double targetFunk(double x[]);

	/**
		optimize parameters. Default is to optimize gamma shape
		@return the best likelihood
	*/
	virtual double optimizeParameters(double gradient_epsilon);

    /**
     *  Same as above but add parameters to control gamma bounds
     */
	virtual double optimizeParameters(double gradient_epsilon, double min_gamma, double max_gamma);

	/**
		override function from Optimization class, used by the minimizeOneDimen() to optimize
		gamma shape parameter
	*/
	virtual double computeFunction(double shape);


	/**
		return the number of dimensions
	*/
	virtual int getNDim() { return !fix_gamma_shape; }

	/**
		write information
		@param out output stream
	*/
	virtual void writeInfo(ostream &out);

	/**
		write parameters, used with modeltest
		@param out output stream
	*/
	virtual void writeParameters(ostream &out);

	virtual bool isFixGammaShape() const {
		return fix_gamma_shape;
	}

	virtual void setFixGammaShape(bool fixGammaShape) {
		fix_gamma_shape = fixGammaShape;
	}

    /**
        set number of rate categories
        @param ncat #categories
    */
	virtual void setNCategory(int ncat);

protected:

	/**
		this function is served for the multi-dimension optimization. It should pack the model parameters
		into a vector that is index from 1 (NOTE: not from 0)
		@param variables (OUT) vector of variables, indexed from 1
	*/
	virtual void setVariables(double *variables);

	/**
		this function is served for the multi-dimension optimization. It should assign the model parameters
		from a vector of variables that is index from 1 (NOTE: not from 0)
		@param variables vector of variables, indexed from 1
		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
	*/
	virtual bool getVariables(double *variables);

	/**
		number of rate categories
	*/
	int ncategory;

	/**
		rates, containing ncategory elements
	*/
	double *rates;


	/**
		the gamma shape parameter 'alpha'
	*/
	double gamma_shape;

	/**
		TRUE to fix the gamma shape parameter
	*/
	bool fix_gamma_shape;

	/**
		TRUE to use median rate for discrete categories, FALSE to use mean rate instead
	*/
	bool cut_median;

public:

	//Normally, beta is assigned equal to alpha
	//double cmpPerPointGamma (const double prob, const double shape);

	/***********************************************************
	NUMERICAL SUBROUTINES
	THE FOLLOWING CODE COMES FROM tools.c in Yang's PAML package
	***********************************************************/
	/** returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places.
	   Stirling's formula is used for the central polynomial part of the procedure.
	   Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.
	   Communications of the Association for Computing Machinery, 9:684

	*/
	static double cmpLnGamma (double alpha);

	/** returns the incomplete gamma ratio I(x,alpha) where x is the upper
	   limit of the integration and alpha is the shape parameter.
	   returns (-1) if in error
	   (1) series expansion     if (alpha>x || x<=1)
	   (2) continued fraction   otherwise
	   RATNEST FORTRAN by
	   Bhattacharjee GP (1970) The incomplete gamma integral.  Applied Statistics,
	   19: 285-287 (AS32)
	*/
	static double cmpIncompleteGamma (double x, double alpha, double ln_gamma_alpha);

	/** functions concerning the CDF and percentage points of the gamma and
	   Chi2 distribution
	   returns z so that Prob{x<z}=prob where x ~ N(0,1) and (1e-12)<prob<1-(1e-12)
	   returns (-9999) if in error
	   Odeh RE & Evans JO (1974) The percentage points of the normal distribution.
	   Applied Statistics 22: 96-97 (AS70)

	   Newer methods:
	     Wichura MJ (1988) Algorithm AS 241: the percentage points of the
	       normal distribution.  37: 477-484.
	     Beasley JD & Springer SG  (1977).  Algorithm AS 111: the percentage
	       points of the normal distribution.  26: 118-121.
	*/
	static double cmpPointNormal (double prob);


	/** returns z so that Prob{x<z}=prob where x is Chi2 distributed with df=v
	   returns -1 if in error.   0.000002<prob<0.999998
	   RATNEST FORTRAN by
	       Best DJ & Roberts DE (1975) The percentage points of the
	       Chi2 distribution.  Applied Statistics 24: 385-388.  (AS91)
	   Converted into C by Ziheng Yang, Oct. 1993.
	*/
	static double cmpPointChi2 (double prob, double v);

	/* THE END OF THE CODES COMMING FROM tools.c in Yang's PAML package */

};




#endif