File: plot_tree_graphlan.py

package info (click to toggle)
metaphlan2 2.6.0%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 72,684 kB
  • ctags: 351
  • sloc: python: 4,352; sh: 26; makefile: 7
file content (177 lines) | stat: -rwxr-xr-x 6,180 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/usr/bin/env python
#Author: Duy Tin Truong (duytin.truong@unitn.it)
#        at CIBIO, University of Trento, Italy

__author__  = 'Duy Tin Truong (duytin.truong@unitn.it)'
__version__ = '0.1'
__date__    = '4 May 2015'

import sys
import os
import argparse as ap
import dendropy
from StringIO import StringIO
import re
from collections import defaultdict
import ConfigParser
import matplotlib.colors as colors
import subprocess


def read_params():
    p = ap.ArgumentParser()
    p.add_argument('--ifn_tree', 
                   required=True, 
                   default=None, 
                   type=str,
                   help='The input tree in newick format.')
    p.add_argument('--colorized_metadata', 
                   required=False, 
                   default='unset', 
                   type=str,
                   help='The metadata field to colorize. Default "unset".')
    p.add_argument('--fig_size', 
                   required=False, 
                   default=8, 
                   type=float,
                   help='The figure size. Default "8".')
    p.add_argument('--legend_marker_size', 
                   required=False, 
                   default=20, 
                   type=int,
                   help='The legend marker size. Default "20".'
                   )
    p.add_argument('--legend_font_size', 
                   required=False, 
                   default=10, 
                   type=int,
                   help='The legend font size. Default "10".'
                   )
    p.add_argument('--legend_marker_edge_width', 
                   required=False, 
                   default=0.2, 
                   type=float,
                   help='The legend marker edge width. Default "0.2".'
                   )
    p.add_argument('--leaf_marker_size', 
                   required=False, 
                   default=20, 
                   type=int,
                   help='The legend marker size. Default "20".'
                   )
    p.add_argument('--leaf_marker_edge_width', 
                   required=False, 
                   default=0.2, 
                   type=float,
                   help='The legend marker edge width. Default "0.2".'
                   )
    p.add_argument('--dpi', 
                   required=False, 
                   default=300, 
                   type=int,
                   help='The figure dpi.')
    p.add_argument('--figure_extension', 
                   required=False, 
                   default='.png', 
                   type=str,
                   help='The figure extension. Default ".png".')
    p.add_argument('--ofn_prefix', 
                   required=False, 
                   default=None, 
                   type=str,
                   help='The prefix of output files.')
    return p.parse_args()




def run(cmd):
    print cmd
    subprocess.call(cmd.split())
    



def main(args):
    tree = dendropy.Tree.get_from_path(args.ifn_tree, schema='newick',
                                       preserve_underscores=True)
    tree.reroot_at_midpoint()
    count = 0
    metadatas = set([])
    node2metadata = {}
    for node in tree.preorder_node_iter():
        nodestr = node.get_node_str().strip("'")
        if node.is_leaf():
            if '.' in nodestr:
                nodestr = nodestr.replace('.',',')
                node.taxon = dendropy.Taxon(label=nodestr)
            substrs = re.findall(
                         '%s-[a-zA-Z0-9.]*'%args.colorized_metadata,
                          nodestr)
            if substrs:
                md = substrs[0].replace(args.colorized_metadata + '-', '')
                metadatas.add(md)
                node2metadata[nodestr] = md
        else:
            count += 1
            node.taxon = dendropy.Taxon(label='node_%d'%count)
    metadatas = sorted(list(metadatas))
    color_names = colors.cnames.keys()
    metadata2color = {}
    for i, md in enumerate(metadatas):
        metadata2color[md] = color_names[i % len(color_names)]

    if not args.ofn_prefix:
        args.ofn_prefix = args.ifn_tree
    ofn_tree = args.ofn_prefix + '.graphlantree'
    tree.write_to_path(ofn_tree, 'newick')
    ofn_annot = args.ofn_prefix + '.annot'
    with open(ofn_annot, 'w') as ofile:
        #ofile.write('clade_separation\t0\n')
        ofile.write('branch_bracket_width\t0\n')
        #ofile.write('clade_separation\t0.15\n')
        ofile.write('branch_bracket_depth\t0\n')
        #ofile.write('branch_thickness\t1.25\n')
        ofile.write('annotation_background_width\t0\n')
        
        # legend
        ofile.write('#legends\n')
        ofile.write('class_legend_font_size\t%d\n'%args.legend_font_size)

        for md in metadata2color:
            ofile.write('%s\tclade_marker_size\t%d\n'%(md, args.legend_marker_size))
            ofile.write('%s\tclade_marker_color\t%s\n'%(md, metadata2color[md]))
            ofile.write('%s\tclade_marker_edge_width\t%f\n'%(md, args.legend_marker_edge_width))

        # remove intermedate nodes
        for node in tree.preorder_node_iter():
            if not node.is_leaf():
                nodestr = node.get_node_str().strip("'")
                ofile.write('%s\tclade_marker_size\t0\n'%(nodestr))

        # colorize leaf nodes
        for node in tree.seed_node.leaf_nodes():
            nodestr = node.get_node_str().strip("'")
            if nodestr in node2metadata:
                leaf_color = metadata2color[node2metadata[nodestr]]
                ofile.write('%s\tclade_marker_size\t%d\n'%(nodestr, args.leaf_marker_size))
                ofile.write('%s\tclade_marker_color\t%s\n'%(nodestr, leaf_color))
                ofile.write('%s\tclade_marker_edge_width\t%f\n'%(nodestr, args.leaf_marker_edge_width))

    ofn_xml = args.ofn_prefix + '.xml'
    cmd = 'graphlan_annotate.py --annot %s %s %s'%(ofn_annot, ofn_tree, ofn_xml)
    run(cmd)

    ofn_fig = args.ofn_prefix + args.figure_extension
    cmd = 'graphlan.py %s %s --dpi %d --size %f'%(ofn_xml, ofn_fig, args.dpi, args.fig_size)
    run(cmd)

    print 'Output file: %s'%ofn_fig




if __name__ == "__main__":
    args = read_params()
    main(args)
    #test()