File: phylosupertreeplen.h

package info (click to toggle)
iqtree 1.5.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 9,780 kB
  • ctags: 11,529
  • sloc: cpp: 96,162; ansic: 59,874; python: 242; sh: 189; makefile: 45
file content (399 lines) | stat: -rw-r--r-- 14,318 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
/*
 * phylosupertreeplen.h
 *
 *  Created on: Aug 5, 2013
 *      Author: olga
 */

#ifndef PHYLOSUPERTREEPLEN_H_
#define PHYLOSUPERTREEPLEN_H_

#include "phylosupertree.h"
#include "model/partitionmodel.h"
#include "superalignmentpairwise.h"


/**
 * this is to classify the cases which happen on the subtree
 *
 *  NNI_NONE_EPSILON: all 5 branches have images on subtree, this corresponds to change in subtree topology
 * 					  2 partial_lh vectors for -nni1 or 6 partial_lh vectors for -nni5 options
 *  NNI_ONE_EPSILON:  only one of the 5 branches has no image on subtree, this does not change subtree topology, but changes branch length of subtrees
 * 					  we need to allocate partial likelihood memory (1 partial_lh vectors for -nni1 option or 3 partial_lh for -nni5 option)
 * 	NNI_TWO_EPSILON:  two branches (on different sides of central branch) have no images, here after the NNI swap,
 * 					  the image of central branch either does not change or is equal to epsilon (then we decrease the branch length)
 * 					  and no allocation of partial_lh is needed
 * 	NNI_THREE_EPSILON: central and two adjacent edges have no images: after the NNI swap, central branch will have image and we need to relink it
 * 					no allocation of partial_lh is needed
 *  NNI_MANY_EPSILON: more than 3 branches have no images on subtree: nothing changes in subtree and no recomputation of partial likelihood are required
 */
enum NNIType {NNI_NO_EPSILON, NNI_ONE_EPSILON, NNI_TWO_EPSILON, NNI_THREE_EPSILON, NNI_MANY_EPSILON};


/**
Edge lengths in subtrees are proportional to edge lengths in a supertree.

	@author Olga Chernomor <olga.chernomor@univie.ac.at>
*/

class PhyloSuperTreePlen;

// Auxiliary classes ====================================================================================
// ======================================================================================================
class SuperAlignmentPairwisePlen : public SuperAlignmentPairwise {

	public:

		/**
		    constructor
		 */

	    SuperAlignmentPairwisePlen();

		/**
			construct the pairwise alignment from two sequences of a multiple alignment
			@param aln input multiple alignment
			@param seq_id1 ID of the first sequence
			@param seq_id2 ID of the second sequence
		*/
		SuperAlignmentPairwisePlen(PhyloSuperTreePlen *atree, int seq1, int seq2);

	    ~SuperAlignmentPairwisePlen();

		/**
			compute the likelihood for a distance between two sequences. Used for the ML optimization of the distance.
			@param value x-value of the function
			@return log-likelihood
		*/
		virtual double computeFunction(double value);

		/**
			This function calculate f(value), first derivative f'(value) and 2nd derivative f''(value).
			used by Newton raphson method to minimize the function.
			@param value x-value of the function
			@param df (OUT) first derivative
			@param ddf (OUT) second derivative
			@return f(value) of function f you want to minimize
		*/
		virtual void computeFuncDerv(double value, double &df, double &ddf);

		/**
			partition information
		*/
		vector<PartitionInfo>* part_info;

};
// ======================================================================================================
class PartitionModelPlen : public PartitionModel
{
public:
    PartitionModelPlen();
	/**
		constructor
		create partition model with possible rate heterogeneity. Create proper class objects
		for two variables: model and site_rate. It takes the following field of params into account:
			model_name, num_rate_cats, freq_type, store_trans_matrix
		@param params program parameters
		@param tree associated phylogenetic super-tree
	*/
	PartitionModelPlen(Params &params, PhyloSuperTreePlen *tree, ModelsBlock *models_block);

    ~PartitionModelPlen();

    /**
        save object into the checkpoint
    */
    virtual void saveCheckpoint();

    /**
        restore object from the checkpoint
    */
    virtual void restoreCheckpoint();

    /**
     * @return #parameters of the model + # branches
     */
    virtual int getNParameters();
    virtual int getNDim();

	/**
		write information
		@param out output stream
	*/
	virtual void writeInfo(ostream &out);

	/**
		optimize model parameters and tree branch lengths
        NOTE 2016-08-20: refactor the semantic of fixed_len
		@param fixed_len 0: optimize branch lengths, 1: fix branch lengths, 2: scale branch lengths
        @param write_info TRUE to write model parameters every optimization step, FALSE to only print at the end
        @param logl_epsilon log-likelihood epsilon to stop
        @param gradient_epsilon gradient (derivative) epsilon to stop
		@return the best likelihood 
	*/
	virtual double optimizeParameters(int fixed_len = BRLEN_OPTIMIZE, bool write_info = true,
                                      double logl_epsilon = 0.1, double gradient_epsilon = 0.001);


	/**
	 *  optimize model parameters and tree branch lengths for the +I+G model
	 *  using restart strategy.
	 * 	@param fixed_len TRUE to fix branch lengths, default is false
	 *	@return the best likelihood
	 */
	virtual double optimizeParametersGammaInvar(int fixed_len = BRLEN_OPTIMIZE, bool write_info = true, double logl_epsilon = 0.1, double gradient_epsilon = 0.001);

	double optimizeGeneRate(double tol);

//	virtual double targetFunk(double x[]);
//	virtual void getVariables(double *variables);
//	virtual void setVariables(double *variables);

    /** partition ID currently under optimization of of its rate */
//    int optimizing_part;

    /**
        compute the likelihood for a partition under rate optimization (optimizing_rate).
        Used for the ML optimization of gene rate
        @param value x-value of the function
        @return log-likelihood
    */
//    virtual double computeFunction(double value);


};

// ======================================================================================================
// ======================================================================================================

class PhyloSuperTreePlen : public PhyloSuperTree {

public:
	/**
		constructors
	*/
	PhyloSuperTreePlen();
	PhyloSuperTreePlen(Params &params);
	PhyloSuperTreePlen(SuperAlignment *alignment, PhyloSuperTree *super_tree);

	~PhyloSuperTreePlen();

    /**
        save object into the checkpoint
    */
    virtual void saveCheckpoint();

    /**
        restore object from the checkpoint
    */
    virtual void restoreCheckpoint();

    /**
            Read the tree saved with Taxon Names and branch lengths.
            @param tree_string tree string to read from
     */
    virtual void readTreeString(const string &tree_string);

    /**
     * Return the tree string containing taxon names and branch lengths
     * @return tree string
     */
    virtual string getTreeString();


    /**
            compute the distance between 2 sequences.
            @param seq1 index of sequence 1
            @param seq2 index of sequence 2
            @param initial_dist initial distance
            @return distance between seq1 and seq2
     */

    virtual double computeDist(int seq1, int seq2, double initial_dist, double &var);

	/**
		create sub-trees T|Y_1,...,T|Y_k of the current super-tree T
		and map F={f_1,...,f_k} the edges of supertree T to edges of subtrees T|Y_i
	*/
	virtual void mapTrees();

	/**
	 * Given current supertree T and subtrees T|Y_1,...,T|Y_k, build all maps f_1,...,f_k
	 */
	virtual void linkTrees();

    /**
            initialize partial_lh vector of all PhyloNeighbors, allocating central_partial_lh
     */
    virtual void initializeAllPartialLh();

    /**
            initialize partial_lh vector of all PhyloNeighbors, allocating central_partial_lh
            @param node the current node
            @param dad dad of the node, used to direct the search
            @param index the index
     */
    virtual void initializeAllPartialLh(int &index, int &indexlh, PhyloNode *node = NULL, PhyloNode *dad = NULL);

    void initializeAllPartialLh(double* &lh_addr, UBYTE* &scale_addr, UINT* &pars_addr, PhyloNode *node = NULL, PhyloNode *dad = NULL);

    /**
            de-allocate central_partial_lh
     */
    virtual void deleteAllPartialLh();

	/**
	 * @return the type of NNI around node1-node2 for partition part
	 */
	void getNNIType(PhyloNode *node1, PhyloNode *node2, vector<NNIType> &nni_type);

    /**
            Inherited from Optimization class.
            This function calculate f(value), first derivative f'(value) and 2nd derivative f''(value).
            used by Newton raphson method to minimize the function.
            @param value current branch length
            @param df (OUT) first derivative
            @param ddf (OUT) second derivative
            @return negative of likelihood (for minimization)
     */
    virtual void computeFuncDerv(double value, double &df, double &ddf);

    /**
            inherited from Optimization class, to return to likelihood of the tree
            when the current branch length is set to value
            @param value current branch length
            @return negative of likelihood (for minimization)
     */
    virtual double computeFunction(double value);

    /**
            compute tree likelihood on a branch. used to optimize branch length
            @param dad_branch the branch leading to the subtree
            @param dad its dad, used to direct the tranversal
            @return tree likelihood
     */
    virtual double computeLikelihoodBranch(PhyloNeighbor *dad_branch, PhyloNode *dad);

    /**
            compute tree likelihood on a branch given buffer (theta_all), used after optimizing branch length
            @return tree likelihood
     */

    virtual double computeLikelihoodFromBuffer();

    /**
            optimize all branch lengths of all subtrees, then compute branch lengths
            of supertree as weighted average over all subtrees
            @param iterations number of iterations to loop through all branches
            @return the likelihood of the tree
     */
    virtual double optimizeAllBranches(int my_iterations = 100, double tolerance = TOL_LIKELIHOOD, int maxNRStep = 100);

    /**
            optimize one branch length by ML by optimizing all mapped branches of subtrees
            @param node1 1st end node of the branch
            @param node2 2nd end node of the branch
            @param clearLH true to clear the partial likelihood, otherwise false
            @return likelihood score
     */
    virtual void optimizeOneBranch(PhyloNode *node1, PhyloNode *node2, bool clearLH = true, int maxNRStep = 100);

    /**
            search the best swap for a branch
            @return NNIMove The best Move/Swap
            @param cur_score the current score of the tree before the swaps
            @param node1 1 of the 2 nodes on the branch
            @param node2 1 of the 2 nodes on the branch
     */
    virtual NNIMove getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove *nniMoves = NULL);


    /**
            Do an NNI on the supertree and synchronize all subtrees respectively
            @param move the single NNI
     */
    virtual void doNNI(NNIMove &move, bool clearLH = true);

	/**
            apply  NNIs from the non-conflicting NNI list
            @param compatibleNNIs vector of all compatible NNIs
            @param changeBran whether or not the computed branch lengths should be applied
     */
    virtual void doNNIs(vector<NNIMove> &compatibleNNIs, bool changeBran = true);

    /**
     *   Apply 5 new branch lengths stored in the NNI move
     *   @param nnimove the NNI move currently in consideration
     */
    virtual void changeNNIBrans(NNIMove nnimove);

    /**
            This is for ML. try to swap the tree with nearest neigbor interchange at the branch connecting node1-node2.
            If a swap shows better score, return the swapped tree and the score.
            @param cur_score current likelihood score
            @param node1 1st end node of the branch
            @param node2 2nd end node of the branch
            @param nni_param (OUT) if not NULL: swapping information returned
            @return the likelihood of the tree
     */
    virtual double swapNNIBranch(double cur_score, PhyloNode *node1, PhyloNode *node2, SwapNNIParam *nni_param = NULL, NNIMove *nniMoves = NULL);

    /**
     *	used in swapNNIBranch to update link_neighbors of other SuperNeighbors that point to the same branch on SubTree as (node,dad)
     *	@param saved_link_dad_nei   pointer to link_neighbor dad_nei
     */
    void linkCheck(int part, Node* node, Node* dad, PhyloNeighbor* saved_link_dad_nei);
    void linkCheckRe(int part, Node* node, Node* dad, PhyloNeighbor* saved_link_dad_nei,PhyloNeighbor* saved_link_node_nei);

	/**
		compute the weighted average of branch lengths over partitions
	*/
	virtual void computeBranchLengths();

	bool checkBranchLen();
	void mapBranchLen();
	void mapBranchLen(int part);
	virtual void printMapInfo();

//	virtual void restoreAllBrans(PhyloNode *node, PhyloNode *dad);

	/**
	 * initialize partition information for super tree
	*/
	virtual void initPartitionInfo();

	void printNNIcasesNUM();

    /**
     * 		indicates whether partition rates are fixed or not
     */

    bool fixed_rates;

    /*
     * 1 - # of is_nni on subtree
     * 2 - # of relink branch to an empty one
     * 3 - # of empty to empty
     * 4 - # of relink branch to a  new one (50% saving on these cases compared to the previous implementation)
     * 5 - # of relink branch to an old one + relink empty to some branch (100% saving on these cases)
     */
    int allNNIcases_computed[5];

    /**
            Neighbor-joining/parsimony tree might contain negative branch length. This
            function will fix this.
            @param fixed_length fixed branch length to set to negative branch lengths
            @param node the current node
            @param dad dad of the node, used to direct the search
            @return The number of branches that have no/negative length
     */
    virtual int fixNegativeBranch(bool force = false, Node *node = NULL, Node *dad = NULL);

protected:
	vector<uint64_t> partial_lh_entries, scale_num_entries, partial_pars_entries, block_size, scale_block_size;

};



#endif /* PHYLOSUPERTREEPLEN_H_ */