File: trees.h

package info (click to toggle)
phast 1.4%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 12,412 kB
  • sloc: ansic: 54,180; makefile: 354; sh: 337; perl: 321
file content (463 lines) | stat: -rw-r--r-- 19,031 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
/***************************************************************************
 * PHAST: PHylogenetic Analysis with Space/Time models
 * Copyright (c) 2002-2005 University of California, 2006-2010 Cornell 
 * University.  All rights reserved.
 *
 * This source code is distributed under a BSD-style license.  See the
 * file LICENSE.txt for details.
 ***************************************************************************/

/** @file trees.h
    Functions and structures to hold, manipulate, and test trees, branches, and nodes.  
    @ingroup phylo
*/

#ifndef TREES_H
#define TREES_H

/** Maximum length of tree described as a string */
#define MAX_TREESTR_LEN 10000
/** Maximum line length that can be read from file at a time */
#define MAX_LINE_LEN 10000

#include <stdio.h>
#include <lists.h>
#include <stringsplus.h>

typedef struct tree_node TreeNode;

/* TODO: starting to accumulate a lot of data about the entire tree at
   each node; perhaps time to adopt a less purely recursive structure,
   with a higher level struct describing the tree as a whole. */
/** Structure describing a single node of a tree. 

  @warning The lists
   should be accessed only via the functions tr_preorder,
   tr_inorder, tr_postorder.  
  @warning Most of these auxiliary
   attributes (nnodes, height, nodes, preorder, inorder) are not
   guaranteed to remain correct if the structure of a tree is
   altered after initialization
*/
struct tree_node {
  TreeNode *parent;		/**< Node that is a parent to this node */
  TreeNode *lchild, 		/**< Node that is a child of this node, drawn to the left in a diagram */
   *rchild;   			/**< Node that is a child of this node, drawn to the right in a diagram */
  double dparent;		/**< Distance to parent node */
  char name [STR_MED_LEN];	/**< Name of this node i.e. 'Drosophila 23' usually one of the sequence names */
  void *data;                   /**< Allows generic data to be 
                                   associated with tree node */ 
  int id;			/**< Uniquely identifying id number for this node */
  int nnodes;                   /**< Number of nodes in subtree defined
                                   by this node */
  int height;                   /**< Height of this node in the tree
                                   (maximum distance to a leaf, in
                                   terms of number of edges) */
  char *label;                  /**< paml-style node label indicated by # 
                                   sign and a character string.  Used 
				   to indicate which lineage-specific 
				   model to use */
  int hold_constant;            /**< If 1, do not optimize this branch length. 
                                   Indicated by an exclamation point after the
                                   branch length */
  List *nodes;                  /**< List of nodes: ith element is a
                                   pointer to the node with id = i (Only guaranteed to be defined for
     				   the TreeNode at the root of a tree, defined on initialization) */
  List *preorder;               /**< List of nodes in the order of a
                                   preorder traversal from the root (Only guaranteed to be defined for
     				   the TreeNode at the root of a tree, defined on demand)*/
  List *inorder;                /**< List of nodes in the order of an
                                   inorder traversal from the root (Only guaranteed to be defined for
     				   the TreeNode at the root of a tree)*/
  List *postorder;              /**< List of nodes in the order of a
                                   postorder traversal from the
                                   root (Only guaranteed to be defined for
     				   the TreeNode at the root of a tree)*/
};

/** \name Tree allocation functions 
\{ */

/** Parse a tree from a file in Newick (New Hampshire) format 
   @param f File descriptor containing data to make new TreeNode from
   @result Newly allocated tree node with data from file
*/
TreeNode *tr_new_from_file(FILE *f);

/** Parse a Newick-formatted tree from a character string 
   @param s String containing data to make new TreeNode from
   @result Newly allocated tree node with data from string
*/
TreeNode *tr_new_from_string(const char *s);

/** Create and initialize a new tree node
   @result Newly allocated tree node
*/
TreeNode *tr_new_node();

/** \} */

/** Add specified child to specified parent, creating all requisite
   links.  If the parent already has two children, add a new node
   (to simulate an nary tree with a binary tree)
   @param parent Parent of the child node should be added below
   @param child Node to be added as a child below parent
 */
void tr_add_child(TreeNode *parent, TreeNode *child);

/** Returns a newly allocated char * with newick representation of tree.
  @param root Tree Node that is to be represented in a newick format string along with all its descendants
  @param show_branch_lengths Whether to include branch lengths in the newick format
  @result Newick tree string representing all nodes below supplied root node
 */
char *tr_to_string(TreeNode *root, int show_branch_lengths);
void tr_to_string_recur(char *str, TreeNode *node, int show_branch_lengths);

/** \name Tree print functions
\{ */

/** Print tree in New Hampshire / Newick format.
   @param f File descriptor of where to print/save tree in newick format
   @param root Tree node that will be printed to a file, along with all its descendants
   @param show_branch_lengths Whether to show branch lengths in the file
 */
void tr_print(FILE* f, TreeNode *root, int show_branch_lengths);
void tr_print_recur(FILE* f, TreeNode *n, int show_branch_lengths);

/** Print tree in New Hampshire / Newick format (Ordered).
   @param f File descriptor of where to print/save tree in newick format
   @param root Tree node that will be printed to a file, along with all its descendants
   @param show_branch_lengths Whether to show branch lengths in the file
 */
void tr_print_ordered(FILE* f, TreeNode *root, int show_branch_lengths);
void tr_print_ordered_recur(FILE* f, TreeNode *n, int *left_right,
                            int show_branch_lengths);


/**  Print a (very basic!) postscript rendering of a tree.
    @param F destination file
    @param tree Tree root
    @param Whether to print branch lengths by edges
    @param if TRUE, branches will be right-angled, otherwise will be diagonal
    @param use_branch_lens If TRUE, time will be laid out such that edges are proportional to branch lengths (dparent attributes)
    @param horizontal_layout If TRUE, tree will be laid out with root on left and leaves on right; otherwise, root will be at top and leaves on bottom
 */
void tr_print_ps(FILE *F, TreeNode *tree, int show_branch_lens,
                 int square_branches, int use_branch_lens, 
                 int horizontal_layout);

/** Print verbose description of each node
   @param F File descriptor to write to
   @param tree Tree to traverse in preorder and write out verbose description of each node
 */
void tr_print_nodes(FILE *F, TreeNode *tree);


/** \} */

/** Free a tree node and all of its descendants
   @param tree TreeNode to start the freeing at
*/
void tr_free(TreeNode *n);

/** Traverse tree to set nnodes at each node (postorder); also set
   height at each node, and create "nodes" list.
   @param tree Tree Node to start traversal at
 */
void tr_set_nnodes(TreeNode *tree);

/** Set the ID counter to zero */
void tr_reset_id();

/** \name Tree copy functions 
\{  */

/** Copy a tree onto an already existing destination tree node.
   @param src Source root tree node
   @param dest Destination root tree node
*/
void tr_cpy(TreeNode *dest, TreeNode *src);

/** Copy a tree creating newly allocated TreeNode.
   @param src Source root tree node to traverse to copy
   @result Newly allocated Tree Node with copied data
*/
TreeNode *tr_create_copy(TreeNode *src);

/** Copy contents of a single tree node (ignore pointers) 
    @param dest Destination Tree node to copy contents to
    @param src Source Tree node to copy contents from
*/
void tr_node_cpy(TreeNode *dest, TreeNode *src);
/** \} \name Tree order of traversal list functions
\{ */

/** Obtain a list representing a preorder traversal of the tree.
    Allows for simplified loops (no stacks!) and improved
    efficiency.
    @param tr First node in the pre-order list
    @result list representing a preorder traversal of the tree
 */
List *tr_preorder(TreeNode *tr);

/** Obtain a list representing an in-order traversal of the tree.
    Allows for simplified loops (no stacks!) and improved
    efficiency.
    @param tr First node in the pre-order list
    @result List representing an in-order traversal of the tree
 */
List *tr_inorder(TreeNode *tr);

/** Obtain a list representing a postorder traversal of the tree.
    Allows for simplified loops (no stacks!) and improved
    efficiency.
    @param tr First node in the post-order list   
    @result List representing post-order traversal of the tree
 */
List *tr_postorder(TreeNode *tr);

/** \} */

/** Provide x-y coordinates for layout. 
    @param x0 Upper left x bound
    @param y1 Upper left y bound
    @param x1 Lower right x bound
    @param y1 Lower right y band
    @param x On return, will contain x-coordinates for nodes in order of tree->nodes. Must be preallocated
    @param y On return, will contain y-coordinates for nodes in order of tree->nodes. Must be preallocated
    @param use_branch_lens IfTRUE, tree will be laid out such that edges are proportional to branch lengths (dparent attribute)
    @param horizontal If TRUE, tree will be laid out with root on left and leaves on right; otherwise, root will be at top and leaves at bottom
*/
void tr_layout_xy(TreeNode *tree, int x0, int y0, int x1, int y1, 
                  int *x, int *y, int use_branch_lens, int horizontal);

/** \name Tree branch length functions 
\{ */
/** Compute and return sum of lengths at all edges in a single node 
    @param t Tree Node to test lengths of edges with
    @result Total length
*/
double tr_total_len(TreeNode *t);

/** Compute and return sum of lengths of edges in subtree below
    given node
     @param sub_root Starting node of subtree for counting edges
     @result Number of edges found in subtree
*/
double tr_total_len_subtree(TreeNode *sub_root);

/** Compute and return maximum branch length in subtree.
   @param sub_root Starting node of subtree for determining largest branch length
   @result Largest branch length in tree/subtree specified
 */
double tr_max_branchlen(TreeNode *sub_root);

/** Compute and return distance from given node to root of tree.
   @param node Node to start measuring distance from.
   @result Distance from node 'node' to root
*/
double tr_distance_to_root(TreeNode *node);

/** \} */

/** Reurn a node from a tree given its name.
  @param t Tree containing node to return
  @param name Name of node to return
  @result NULL if not found, otherwise tree node specified by name
*/
TreeNode *tr_get_node(TreeNode *t, const char *name);

/** \name Tree scale functions 
\{*/
/** Scale all branch lengths by constant factor.
   @param t Root node of tree to modify
   @param scale_const Amount to scale all branch lengths by
*/
void tr_scale(TreeNode *t, double scale_const);

/** Scale all branch lengths by constant factor in subtree beneath given node 
   @param t Root node of tree containing subtree
   @param sub First node in subtree to modify
   @param scale_const Amount to scale all branch lengths by
   @param include_leading Whether to also scale the leading node
*/
void tr_scale_subtree(TreeNode *t, TreeNode *sub, double scale_const,
		      int include_leading);


/** Scale a tree so that the total branch length of some subtree is as
   defined by a second tree.  
   @param tree  Tree to scale
   @param sub Subtree of 'tree' with leaf
   names a subset of those in the
   first.
   @result Scale factor
   @note This function is similar to tr_extrapolate, but the branch
   length proportions of the larger tree are used without change and
   the smaller tree need not be a proper subtree of the larger
   tree. 
*/
double tr_scale_by_subtree(TreeNode *tree, TreeNode *sub);


/** \} \name Tree prune functions 
\{ */

/**  Prune away all leaves whose names are in (or not in) the specified
    list.  Nodes will be removed and branches combined (branch lengths
    added) to restore as a proper binary tree. 
    @param[in,out] t Tree to prune (may be altered because root can change)
    @param[in,out] names List of names.  On return, will contain list of names of leaves
           that were pruned away.
    @param[in] all_but if FALSE, prune leaves *in* 'names'; if TRUE, prune leaves *not in* 'names'
    @param[out] id_map if not NULL, should be allocated to the number of nodes 
    in original tree. On return, will be filled in with the new id for each 
    node
*/
void tr_prune(TreeNode **t, List *names, int all_but, int *id_map);

/** Prune away all nodes not in the specified subtree.
    @param t Root node of tree to prune
    @param n Root node of subtree to keep
*/
void tr_prune_supertree(TreeNode **t, TreeNode *n);

/** Prune away all nodes in the specified subtree.
    @param t Root node of tree to prune
    @param n Root node of subtree to prune away
 */
void tr_prune_subtree(TreeNode **t, TreeNode *n);

/** \} */

/**  Return the Least Common Ancestor (LCA) of the given species. 
     @pre Ids must be numbered in preorder (a node's parent always has a smaller id than it does and
    left descendants have smaller ids than right descendants)
     @param tree Tree containing species specified in list 'names'
     @param names Names of sequences to compute LCA for
     @result Least common ancestor of nodes specified in 'names'
  */
TreeNode *tr_lca(TreeNode *tree, List *names);

/** Given two trees, one of which is a (proper) subtree of the
    other, create a hybrid tree composed of the smaller tree and a
    scaled version of the larger tree.  
    @param sub Subtree of tree 'super'
    @param super Tree that contains the subtree 'sub'
    @result Hybrid tree composed of the smaller tree and a scaled version of the larger tree
    @note This
    function can be used to extrapolate from a small phylogeny for
    which accurate branch length estimation is possible (e.g., of
    eutherian mammals) to a larger phylogeny for which approximate
    branch length proportions are available, but absolute branch
    length estimates are not (e.g., of more distant vertebrates).
 */
TreeNode *tr_hybrid(TreeNode *sub, TreeNode *super);

/** Partition leaves of tree at (branch above) given node.  All
    descendant leaves of 'sub' will be added to 'inside' list and all
    non-descendants will be added to 'outside' list.  Lists must be
    pre-allocated. 
    @pre Lists 'inside' and 'outside' must be pre-allocated
    @param tree Tree containing subtree
    @param sub Node defining subtree
    @param inside List containing descendant leaves 
    @param outside List containing non-descendant leaves
*/
void tr_partition_leaves(TreeNode *tree, TreeNode *sub, List *inside, 
                         List *outside);

/** Partition leaves of tree at all nodes.  All
    descendant leaves of 'sub' will be added to 'inside' list and all
    non-descendants will be added to 'outside' list.  Lists must be
    pre-allocated. 
    @pre Lists 'inside' and 'outside' must be pre-allocated
    @param tree Tree containing subtree
    @param sub Node defining subtree
    @param inside (Optional) List containing descendant leaves 
    @param outside (Optional) List containing non-descendant leaves
 */
void tr_partition_nodes(TreeNode *tree, TreeNode *sub, List *inside, 
			List *outside);

/** Return a list of the leaf names in a given tree 
    @param tree Tree containing leaf names
    @result List of leaf names
*/
List *tr_leaf_names(TreeNode *tree);

/** Name all un-named ancestral nodes.
   If a node is unnamed, give
   it a name that is a concatenation of the name of a leaf from its
   left subtree and the name of a leaf from its right subtree.
   Leftmost descendants are selected, for lack of any better
   criterion.  
   @param tree Tree containing nodes that might not all have names
*/
void tr_name_ancestors(TreeNode *tree);

/** Re-root tree.

    Subtree originally beneath selected node will become
    right subtree of root, and remainder of tree will be left
    subtree.  
    @warning ids will not be altered, so they will no longer be 
    consistent with a preorder traversal of the tree 
    @warning will not maintain memory of which branches are to be held
    constant
  @param tree Tree to re-root
  @param newroot The new root for specified tree
  @param include_branch If include_branch == FALSE, the selected node will become
    the new root, and a zero-length branch to its right will connect
    it to its original subtree.  If instead include_branch == TRUE,
    then the branch above the selected node will also be included in the
    right subtree.  In this case, the selected node will become the
    right child of the new root and the branch in question will become
    the right branch beneath the new root.  The left branch beneath
    the new root will have length zero and will connect to the former
    parent of the selected node. 
 */
void tr_reroot(TreeNode *tree, TreeNode *newroot, int include_branch);

/** Return an array indicating whether each node is in the designated subtree
    @param t Tree containing subtree 'sub'
    @param sub Subtree of tree 't'
    @result Array of int indicating whether each node is (1) in he designated subtree or not (0). Indexed by id
 */
int* tr_in_subtree(TreeNode *t, TreeNode *sub);

/** \name Tree label functions 
\{ */

/** Set the label for a tree node
    @param t Tree Node to label
    @param label Label to apply to specified tree node
 */
void tr_label(TreeNode *t, const char *label);

/** Set label for a tree node (found by node name).
   @param tree Tree containing node to label
   @param nodename Name of the node to set label for
   @param label New label to apply to tree node
 */
void tr_label_node(TreeNode *t, const char *nodename, const char *label);

/** Set label for an entire subtree.
   @param tree Tree containing subtree to label
   @param subtreeNode Root node of the subtree to label
   @param include_leading_branch Whether to include node subtreeNode in the subtree or treat it as a parent
   @param label Label to apply to set  
*/
void tr_label_subtree(TreeNode *tree, const char *subtreeNode, 
		      int include_leading_branch,
		      const char *label);

/** Create a list of nodes with a given label.
    @param[in] tree Tree containing labeled nodes
    @param[in] label Label that identifies nodes to be added to list
    @param[out] rv List of nodes with given label
 */
void tr_get_labelled_nodes(TreeNode *tree, const char *label, List *rv);
/** \} */

#endif