1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
|
/*
* phylosupertreeplen.h
*
* Created on: Aug 5, 2013
* Author: olga
*/
#ifndef PHYLOSUPERTREEPLEN_H_
#define PHYLOSUPERTREEPLEN_H_
#include "phylosupertree.h"
#include "model/partitionmodel.h"
#include "superalignmentpairwise.h"
/**
* this is to classify the cases which happen on the subtree
*
* NNI_NONE_EPSILON: all 5 branches have images on subtree, this corresponds to change in subtree topology
* 2 partial_lh vectors for -nni1 or 6 partial_lh vectors for -nni5 options
* NNI_ONE_EPSILON: only one of the 5 branches has no image on subtree, this does not change subtree topology, but changes branch length of subtrees
* we need to allocate partial likelihood memory (1 partial_lh vectors for -nni1 option or 3 partial_lh for -nni5 option)
* NNI_TWO_EPSILON: two branches (on different sides of central branch) have no images, here after the NNI swap,
* the image of central branch either does not change or is equal to epsilon (then we decrease the branch length)
* and no allocation of partial_lh is needed
* NNI_THREE_EPSILON: central and two adjacent edges have no images: after the NNI swap, central branch will have image and we need to relink it
* no allocation of partial_lh is needed
* NNI_MANY_EPSILON: more than 3 branches have no images on subtree: nothing changes in subtree and no recomputation of partial likelihood are required
*/
enum NNIType {NNI_NO_EPSILON, NNI_ONE_EPSILON, NNI_TWO_EPSILON, NNI_THREE_EPSILON, NNI_MANY_EPSILON};
/**
Edge lengths in subtrees are proportional to edge lengths in a supertree.
@author Olga Chernomor <olga.chernomor@univie.ac.at>
*/
class PhyloSuperTreePlen;
// Auxiliary classes ====================================================================================
// ======================================================================================================
class SuperAlignmentPairwisePlen : public SuperAlignmentPairwise {
public:
/**
constructor
*/
SuperAlignmentPairwisePlen();
/**
construct the pairwise alignment from two sequences of a multiple alignment
@param aln input multiple alignment
@param seq_id1 ID of the first sequence
@param seq_id2 ID of the second sequence
*/
SuperAlignmentPairwisePlen(PhyloSuperTreePlen *atree, int seq1, int seq2);
~SuperAlignmentPairwisePlen();
/**
compute the likelihood for a distance between two sequences. Used for the ML optimization of the distance.
@param value x-value of the function
@return log-likelihood
*/
virtual double computeFunction(double value);
/**
This function calculate f(value), first derivative f'(value) and 2nd derivative f''(value).
used by Newton raphson method to minimize the function.
@param value x-value of the function
@param df (OUT) first derivative
@param ddf (OUT) second derivative
@return f(value) of function f you want to minimize
*/
virtual void computeFuncDerv(double value, double &df, double &ddf);
/**
partition information
*/
vector<PartitionInfo>* part_info;
};
// ======================================================================================================
class PartitionModelPlen : public PartitionModel
{
public:
PartitionModelPlen();
/**
constructor
create partition model with possible rate heterogeneity. Create proper class objects
for two variables: model and site_rate. It takes the following field of params into account:
model_name, num_rate_cats, freq_type, store_trans_matrix
@param params program parameters
@param tree associated phylogenetic super-tree
*/
PartitionModelPlen(Params ¶ms, PhyloSuperTreePlen *tree, ModelsBlock *models_block);
~PartitionModelPlen();
/**
save object into the checkpoint
*/
virtual void saveCheckpoint();
/**
restore object from the checkpoint
*/
virtual void restoreCheckpoint();
/**
* @return #parameters of the model + # branches
*/
virtual int getNParameters();
virtual int getNDim();
/**
write information
@param out output stream
*/
virtual void writeInfo(ostream &out);
/**
optimize model parameters and tree branch lengths
NOTE 2016-08-20: refactor the semantic of fixed_len
@param fixed_len 0: optimize branch lengths, 1: fix branch lengths, 2: scale branch lengths
@param write_info TRUE to write model parameters every optimization step, FALSE to only print at the end
@param logl_epsilon log-likelihood epsilon to stop
@param gradient_epsilon gradient (derivative) epsilon to stop
@return the best likelihood
*/
virtual double optimizeParameters(int fixed_len = BRLEN_OPTIMIZE, bool write_info = true,
double logl_epsilon = 0.1, double gradient_epsilon = 0.001);
/**
* optimize model parameters and tree branch lengths for the +I+G model
* using restart strategy.
* @param fixed_len TRUE to fix branch lengths, default is false
* @return the best likelihood
*/
virtual double optimizeParametersGammaInvar(int fixed_len = BRLEN_OPTIMIZE, bool write_info = true, double logl_epsilon = 0.1, double gradient_epsilon = 0.001);
double optimizeGeneRate(double tol);
// virtual double targetFunk(double x[]);
// virtual void getVariables(double *variables);
// virtual void setVariables(double *variables);
/** partition ID currently under optimization of of its rate */
// int optimizing_part;
/**
compute the likelihood for a partition under rate optimization (optimizing_rate).
Used for the ML optimization of gene rate
@param value x-value of the function
@return log-likelihood
*/
// virtual double computeFunction(double value);
};
// ======================================================================================================
// ======================================================================================================
class PhyloSuperTreePlen : public PhyloSuperTree {
public:
/**
constructors
*/
PhyloSuperTreePlen();
PhyloSuperTreePlen(Params ¶ms);
PhyloSuperTreePlen(SuperAlignment *alignment, PhyloSuperTree *super_tree);
~PhyloSuperTreePlen();
/**
save object into the checkpoint
*/
virtual void saveCheckpoint();
/**
restore object from the checkpoint
*/
virtual void restoreCheckpoint();
/**
Read the tree saved with Taxon Names and branch lengths.
@param tree_string tree string to read from
*/
virtual void readTreeString(const string &tree_string);
/**
* Return the tree string containing taxon names and branch lengths
* @return tree string
*/
virtual string getTreeString();
/**
compute the distance between 2 sequences.
@param seq1 index of sequence 1
@param seq2 index of sequence 2
@param initial_dist initial distance
@return distance between seq1 and seq2
*/
virtual double computeDist(int seq1, int seq2, double initial_dist, double &var);
/**
create sub-trees T|Y_1,...,T|Y_k of the current super-tree T
and map F={f_1,...,f_k} the edges of supertree T to edges of subtrees T|Y_i
*/
virtual void mapTrees();
/**
* Given current supertree T and subtrees T|Y_1,...,T|Y_k, build all maps f_1,...,f_k
*/
virtual void linkTrees();
/**
initialize partial_lh vector of all PhyloNeighbors, allocating central_partial_lh
*/
virtual void initializeAllPartialLh();
/**
initialize partial_lh vector of all PhyloNeighbors, allocating central_partial_lh
@param node the current node
@param dad dad of the node, used to direct the search
@param index the index
*/
virtual void initializeAllPartialLh(int &index, int &indexlh, PhyloNode *node = NULL, PhyloNode *dad = NULL);
void initializeAllPartialLh(double* &lh_addr, UBYTE* &scale_addr, UINT* &pars_addr, PhyloNode *node = NULL, PhyloNode *dad = NULL);
/**
de-allocate central_partial_lh
*/
virtual void deleteAllPartialLh();
/**
* @return the type of NNI around node1-node2 for partition part
*/
void getNNIType(PhyloNode *node1, PhyloNode *node2, vector<NNIType> &nni_type);
/**
Inherited from Optimization class.
This function calculate f(value), first derivative f'(value) and 2nd derivative f''(value).
used by Newton raphson method to minimize the function.
@param value current branch length
@param df (OUT) first derivative
@param ddf (OUT) second derivative
@return negative of likelihood (for minimization)
*/
virtual void computeFuncDerv(double value, double &df, double &ddf);
/**
inherited from Optimization class, to return to likelihood of the tree
when the current branch length is set to value
@param value current branch length
@return negative of likelihood (for minimization)
*/
virtual double computeFunction(double value);
/**
compute tree likelihood on a branch. used to optimize branch length
@param dad_branch the branch leading to the subtree
@param dad its dad, used to direct the tranversal
@return tree likelihood
*/
virtual double computeLikelihoodBranch(PhyloNeighbor *dad_branch, PhyloNode *dad);
/**
compute tree likelihood on a branch given buffer (theta_all), used after optimizing branch length
@return tree likelihood
*/
virtual double computeLikelihoodFromBuffer();
/**
optimize all branch lengths of all subtrees, then compute branch lengths
of supertree as weighted average over all subtrees
@param iterations number of iterations to loop through all branches
@return the likelihood of the tree
*/
virtual double optimizeAllBranches(int my_iterations = 100, double tolerance = TOL_LIKELIHOOD, int maxNRStep = 100);
/**
optimize one branch length by ML by optimizing all mapped branches of subtrees
@param node1 1st end node of the branch
@param node2 2nd end node of the branch
@param clearLH true to clear the partial likelihood, otherwise false
@return likelihood score
*/
virtual void optimizeOneBranch(PhyloNode *node1, PhyloNode *node2, bool clearLH = true, int maxNRStep = 100);
/**
search the best swap for a branch
@return NNIMove The best Move/Swap
@param cur_score the current score of the tree before the swaps
@param node1 1 of the 2 nodes on the branch
@param node2 1 of the 2 nodes on the branch
*/
virtual NNIMove getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove *nniMoves = NULL);
/**
Do an NNI on the supertree and synchronize all subtrees respectively
@param move the single NNI
*/
virtual void doNNI(NNIMove &move, bool clearLH = true);
/**
apply NNIs from the non-conflicting NNI list
@param compatibleNNIs vector of all compatible NNIs
@param changeBran whether or not the computed branch lengths should be applied
*/
virtual void doNNIs(vector<NNIMove> &compatibleNNIs, bool changeBran = true);
/**
* Apply 5 new branch lengths stored in the NNI move
* @param nnimove the NNI move currently in consideration
*/
virtual void changeNNIBrans(NNIMove nnimove);
/**
This is for ML. try to swap the tree with nearest neigbor interchange at the branch connecting node1-node2.
If a swap shows better score, return the swapped tree and the score.
@param cur_score current likelihood score
@param node1 1st end node of the branch
@param node2 2nd end node of the branch
@param nni_param (OUT) if not NULL: swapping information returned
@return the likelihood of the tree
*/
virtual double swapNNIBranch(double cur_score, PhyloNode *node1, PhyloNode *node2, SwapNNIParam *nni_param = NULL, NNIMove *nniMoves = NULL);
/**
* used in swapNNIBranch to update link_neighbors of other SuperNeighbors that point to the same branch on SubTree as (node,dad)
* @param saved_link_dad_nei pointer to link_neighbor dad_nei
*/
void linkCheck(int part, Node* node, Node* dad, PhyloNeighbor* saved_link_dad_nei);
void linkCheckRe(int part, Node* node, Node* dad, PhyloNeighbor* saved_link_dad_nei,PhyloNeighbor* saved_link_node_nei);
/**
compute the weighted average of branch lengths over partitions
*/
virtual void computeBranchLengths();
bool checkBranchLen();
void mapBranchLen();
void mapBranchLen(int part);
virtual void printMapInfo();
// virtual void restoreAllBrans(PhyloNode *node, PhyloNode *dad);
/**
* initialize partition information for super tree
*/
virtual void initPartitionInfo();
void printNNIcasesNUM();
/**
* indicates whether partition rates are fixed or not
*/
bool fixed_rates;
/*
* 1 - # of is_nni on subtree
* 2 - # of relink branch to an empty one
* 3 - # of empty to empty
* 4 - # of relink branch to a new one (50% saving on these cases compared to the previous implementation)
* 5 - # of relink branch to an old one + relink empty to some branch (100% saving on these cases)
*/
int allNNIcases_computed[5];
/**
Neighbor-joining/parsimony tree might contain negative branch length. This
function will fix this.
@param fixed_length fixed branch length to set to negative branch lengths
@param node the current node
@param dad dad of the node, used to direct the search
@return The number of branches that have no/negative length
*/
virtual int fixNegativeBranch(bool force = false, Node *node = NULL, Node *dad = NULL);
protected:
vector<uint64_t> partial_lh_entries, scale_num_entries, partial_pars_entries, block_size, scale_block_size;
};
#endif /* PHYLOSUPERTREEPLEN_H_ */
|