1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
|
#!/usr/bin/env python
# File created on 19 Aug 2010
from __future__ import division
__author__ = "Justin Kuczynski"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Justin Kuczynski"]
__license__ = "GPL"
__version__ = "1.4.0"
__maintainer__ = "Greg Caporaso"
__email__ = "justinak@gmail.com"
__status__ = "Release"
import os
from qiime.util import make_option
import qiime.parse
from qiime.parse import parse_newick
from qiime.util import parse_command_line_parameters, get_options_lookup
from cogent.core.tree import PhyloNode
from cogent.phylo.consensus import majorityRule
options_lookup = get_options_lookup()
script_info = {}
script_info['brief_description'] = "This script outputs a majority consensus tree given a collection of input trees."
script_info['script_description'] = ""
script_info['script_usage'] = [
("basic usage: given a directory of trees 'jackknifed_trees', compute the majority consensus and save as a newick formatted text file","",
"consensus_tree.py -i jackknifed_trees -o consensus_tree.tre")
]
script_info['output_description']= "The output is a newick formatted tree compatible with most standard tree viewing programs"
script_info['required_options'] = [\
make_option('-i','--input_dir',action='store',
type='string',help='input folder containing trees'),
make_option('-o','--output_fname',help='the output consensus tree filepath')
]
script_info['optional_options'] = [\
make_option('-s','--strict',
help='Use only nodes occurring >50% of the time [default: %default]',
default=False, action='store_true')
]
script_info['version'] = __version__
def load_tree_files(tree_dir):
"""Load trees from filepaths
checks if filenames indicate that trees are from different
distance methods. If so, warns user.
loads trees into phylonode objects
returns [trees]
raises a RuntimeError if no trees are loaded
"""
tree_file_names = os.listdir(tree_dir)
# ignore invisible files like .DS_Store
tree_file_names = [fname for fname in tree_file_names if not \
fname.startswith('.')]
#try to warn user if using multiple types of trees {
try:
base_names = []
for fname in tree_file_names:
base_names.append(qiime.parse.parse_rarefaction_fname(fname)[0])
except ValueError:
pass
else:
if len(set(base_names)) > 1:
warnstr = """
warning: trees are named differently, please be sure you're not
comparing trees generated in different manners, unless you're quite sure
that's what you intend to do. types: """ + str(set(base_names)) + """
continuing anyway..."""
warn(warnstr)
# }
trees = []
for fname in tree_file_names:
try:
f = open(os.path.join(tree_dir, fname), 'U')
tree = parse_newick(f, PhyloNode)
tree.filepath = fname
trees.append(tree)
f.close()
except IOError, err:
sys.stderr.write('error loading tree ' + fname + '\n')
exit(1)
if len(trees) == 0:
raise RuntimeError('Error: no trees loaded'+\
', check that tree directory has has valid trees')
return trees
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
trees = load_tree_files(opts.input_dir)
consensus = majorityRule(trees=trees, strict=opts.strict)[0]
# don't know why returns a list
f = open(opts.output_fname,'w')
f.write(consensus.getNewick(with_distances=True))
f.close()
if __name__ == "__main__":
main()
|