File: diff_trees.py

package info (click to toggle)
augur 24.4.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 25,312 kB
  • sloc: python: 14,253; sh: 227; makefile: 35
file content (53 lines) | stat: -rw-r--r-- 1,908 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import argparse
import Bio.Phylo
import deepdiff

from augur.argparse_ import ExtendOverwriteDefault


def clade_to_items(clade, attrs=("name", "branch_length")):
    """Recursively convert a clade of a tree to a list of nested lists according to
    the topology of the clade with the requested attributes per node.

    Examples
    --------
    >>> from io import StringIO
    >>> treedata = "(A, (B, C), (D, E))"
    >>> handle = StringIO(treedata)
    >>> tree = Bio.Phylo.read(handle, "newick")
    >>> clade_to_items(tree.root)
    [[None, None], [['A', None]], [[None, None], [['B', None]], [['C', None]]], [[None, None], [['D', None]], [['E', None]]]]
    """
    items = [[
        getattr(clade, attr)
        for attr in attrs
    ]]

    for child in clade.clades:
        items.extend([clade_to_items(child)])

    return items


if __name__ == "__main__":
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("first_tree", help="first Newick tree to compare")
    parser.add_argument("second_tree", help="second Newick tree to compare")
    parser.add_argument("--attributes", nargs="+", action=ExtendOverwriteDefault, default=["name", "branch_length"], help="node attributes to include in comparison")
    parser.add_argument("--significant-digits", type=int, default=5, help="number of significant digits to use when comparing branch lengths")

    args = parser.parse_args()

    first_tree = Bio.Phylo.read(args.first_tree, "newick")
    second_tree = Bio.Phylo.read(args.second_tree, "newick")

    first_tree_items = clade_to_items(first_tree.root, attrs=args.attributes)
    second_tree_items = clade_to_items(second_tree.root, attrs=args.attributes)

    print(
        deepdiff.DeepDiff(
            first_tree_items,
            second_tree_items,
            significant_digits=args.significant_digits
        )
    )