File: consensus_tree.py

package info (click to toggle)
qiime 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 29,704 kB
  • sloc: python: 77,837; haskell: 379; sh: 113; makefile: 103
file content (112 lines) | stat: -rwxr-xr-x 3,616 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env python
# File created on 19 Aug 2010
from __future__ import division

__author__ = "Justin Kuczynski"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Justin Kuczynski"]
__license__ = "GPL"
__version__ = "1.4.0"
__maintainer__ = "Greg Caporaso"
__email__ = "justinak@gmail.com"
__status__ = "Release"

import os
from qiime.util import make_option

import qiime.parse
from qiime.parse import parse_newick
from qiime.util import parse_command_line_parameters, get_options_lookup

from cogent.core.tree import PhyloNode
from cogent.phylo.consensus import majorityRule


options_lookup = get_options_lookup()

script_info = {}
script_info['brief_description'] = "This script outputs a majority consensus tree given a collection of input trees."
script_info['script_description'] = ""
script_info['script_usage'] = [
 ("basic usage: given a directory of trees 'jackknifed_trees', compute the majority consensus and save as a newick formatted text file","",
 "consensus_tree.py -i jackknifed_trees -o consensus_tree.tre")
 
]

script_info['output_description']= "The output is a newick formatted tree compatible with most standard tree viewing programs"
script_info['required_options'] = [\
 make_option('-i','--input_dir',action='store',
          type='string',help='input folder containing trees'),
 make_option('-o','--output_fname',help='the output consensus tree filepath')
]
script_info['optional_options'] = [\
 make_option('-s','--strict', 
  help='Use only nodes occurring >50% of the time [default: %default]',
  default=False, action='store_true')
]
script_info['version'] = __version__



def load_tree_files(tree_dir):
    """Load trees from filepaths
    
    checks if  filenames indicate that trees are from different 
    distance methods.  If so, warns user.
    loads trees into phylonode objects
    returns [trees]
    raises a RuntimeError if no  trees are loaded
    """
    tree_file_names = os.listdir(tree_dir)
    # ignore invisible files like .DS_Store
    tree_file_names = [fname for fname in tree_file_names if not \
        fname.startswith('.')]

    #try to warn user if using multiple types of trees { 
    try:
        base_names = []
        for fname in tree_file_names:
            base_names.append(qiime.parse.parse_rarefaction_fname(fname)[0])
    except ValueError:
        pass
    else:
        if len(set(base_names)) > 1:
            warnstr = """
warning: trees are named differently, please be sure you're not 
comparing trees generated in different manners, unless you're quite sure 
that's what you intend to do.  types: """ + str(set(base_names)) + """
continuing anyway..."""
            warn(warnstr)
    # }
    trees = []
    for fname in tree_file_names:
        try:
            f = open(os.path.join(tree_dir, fname), 'U')
            tree = parse_newick(f, PhyloNode)
            tree.filepath = fname
            trees.append(tree)
            f.close()
        except IOError, err:
            sys.stderr.write('error loading tree ' + fname + '\n')
            exit(1)
    if len(trees) == 0:
        raise RuntimeError('Error: no trees loaded'+\
            ', check that tree directory has has valid trees')
    return trees

def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    

    trees = load_tree_files(opts.input_dir)

    consensus = majorityRule(trees=trees, strict=opts.strict)[0]
    # don't know why returns a list
    
    f = open(opts.output_fname,'w')
    f.write(consensus.getNewick(with_distances=True))
    f.close()

if __name__ == "__main__":
    main()