1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
|
#!/usr/bin/env python
# File created on 11 Oct 2011
from __future__ import division
__author__ = "Jesse Stombaugh"
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = ["Jesse Stombaugh", "Jai Ram Rideout", "Emily TerAvest"]
__license__ = "GPL"
__version__ = "1.8.0"
__maintainer__ = "Jesse Stombaugh"
__email__ = "jesse.stombaugh@colorado.edu"
from qiime.util import parse_command_line_parameters, make_option, \
get_options_lookup,load_qiime_config,create_dir
from cogent.parse.fasta import MinimalFastaParser
from cogent.core.alignment import DenseAlignment
from qiime.parse import parse_qiime_parameters
from cogent.core.moltype import DNA
from qiime.util import get_tmp_filename
from os.path import abspath,join,split,splitext
from qiime.insert_seqs_into_tree import convert_tree_tips, \
write_updated_tree_file, \
strip_and_rename_unwanted_labels_from_tree
import cogent.app.raxml_v730
import cogent.app.parsinsert
import cogent.app.pplacer
from StringIO import StringIO
options_lookup = get_options_lookup()
qiime_config = load_qiime_config()
insertion_method_choices = ['pplacer','raxml_v730','parsinsert']
script_info = {}
script_info['brief_description'] = "Tree Insertion"
script_info['script_description'] = "This script takes a set of aligned sequences (query) either in the same file as the aligned reference set or separated (depending on method) along with a starting tree and produces a new tree containing the query sequences. This script requires that the user is running Raxml v7.3.0, PPlacer git repository version and ParsInsert 1.0.4."
script_info['script_usage'] = []
script_info['script_usage'].append(("""RAxML Example (default):""","""If you just want to use the default options, you can supply an alignment files where the query and reference sequences are included, along with a starting tree as follows:""","""%prog -i aligned_query_seqs.fasta -r aligned_reference_seqs.fasta -t starting_tree.tre -o insertion_results"""))
script_info['script_usage'].append(("""ParsInsert Example:""","""If you want to insert sequences using pplacer, you can supply a fasta file containg query sequences (aligned to reference sequences) along with the reference alignment, a starting tree and the stats file produced when building the starting tree via pplacer as follows:""","""%prog -i aligned_query_seqs.fasta -r aligned_reference_seqs.fasta -t starting_tree.tre -o insertion_results -m parsinsert"""))
script_info['script_usage'].append(("""Pplacer Example:""","""If you want to insert sequences using pplacer, you can supply a fasta file containg query sequences (aligned to reference sequences) along with the reference alignment, a starting tree and the stats file produced when building the starting tree via pplacer as follows:""","""%prog -i aligned_query_seqs.fasta -r aligned_reference_seqs.fasta -t starting_tree.tre -o insertion_results -m pplacer"""))
script_info['script_usage'].append(("""Parameters file:""","""Additionally, users can supply a parameters file to change the options of the underlying tools as follows:""","""%prog -i aligned_query_seqs.fasta -r aligned_reference_seqs.fasta -t starting_tree.tre -o insertion_results -p raxml_parameters.txt"""))
script_info['output_description']= "The result of this script produces a tree file (in Newick format) along with a log file containing the output from the underlying tool used for tree insertion."
script_info['required_options'] = [\
options_lookup['fasta_as_primary_input'],
options_lookup['output_dir'],
make_option('-t','--starting_tree_fp',\
type='existing_filepath',help='Starting Tree which you would like to insert into.'),
make_option('-r','--refseq_fp',\
type='existing_filepath',dest='refseq_fp',help='Filepath for '+\
'reference alignment'),
]
script_info['optional_options'] = [\
make_option('-m','--insertion_method',\
type='choice',help='Method for aligning'+\
' sequences. Valid choices are: ' +\
', '.join(insertion_method_choices) + ' [default: %default]',
choices=insertion_method_choices,\
default='raxml_v730'),
make_option('-s','--stats_fp',\
type='existing_filepath',help='Stats file produced by tree-building software. REQUIRED if -m pplacer [default: %default]'),
make_option('-p','--method_params_fp',\
type='existing_filepath',help="Parameters file containing method-specific parameters to use. Lines should be formatted as 'raxml:-m GTRCAT' (note this is not a standard QIIME parameters file, but a RAxML parameters file). [default: %default]"),
]
script_info['version'] = __version__
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
parameters={}
# get the tree insertion method to use
module = opts.insertion_method
# create output directory
output_dir=opts.output_dir
create_dir(output_dir)
# list of tree insertion methods
tree_insertion_module_names = \
{'raxml_v730':cogent.app.raxml_v730,
'parsinsert':cogent.app.parsinsert,
'pplacer':cogent.app.pplacer}
# load input sequences and convert to phylip since the tools require
# the query sequences to phylip-compliant names
load_aln = MinimalFastaParser(open(opts.input_fasta_fp,'U'))
aln = DenseAlignment(load_aln)
seqs, align_map = aln.toPhylip()
if opts.method_params_fp:
param_dict = parse_qiime_parameters(open(opts.method_params_fp,'U'))
if module=='raxml_v730':
# load the reference sequences
load_ref_aln = \
DenseAlignment(MinimalFastaParser(open(opts.refseq_fp,'U')))
# combine and load the reference plus query
combined_aln = MinimalFastaParser(StringIO(load_ref_aln.toFasta() + \
'\n' + aln.toFasta()))
# overwrite the alignment map
aln = DenseAlignment(combined_aln)
seqs, align_map = aln.toPhylip()
try:
parameters = param_dict['raxml']
except:
parameters = {}
tree = convert_tree_tips(align_map,opts.starting_tree_fp)
# write out the tree with phylip labels
updated_tree_fp = join(output_dir, \
'%s_phylip_named_tree.tre' % (module))
write_updated_tree_file(updated_tree_fp,tree)
# set the primary parameters for raxml
parameters['-w'] = abspath(output_dir)+'/'
parameters["-n"] = split(splitext(get_tmp_filename())[0])[-1]
parameters["-t"] = updated_tree_fp
if "-f" not in parameters:
parameters["-f"] = 'v'
if "-m" not in parameters:
parameters["-m"] = 'GTRGAMMA'
elif module=='pplacer':
try:
parameters=param_dict['pplacer']
except:
parameters={}
# make sure stats file is passed
if not opts.stats_fp:
raise IOError, \
'When using pplacer, the RAxML produced info file is required.'
# set the primary parameters for pplacer - allow for user-defined
parameters['--out-dir'] = abspath(output_dir)+'/'
parameters["-t"] = opts.starting_tree_fp
parameters['-r'] = opts.refseq_fp
parameters['-s'] = opts.stats_fp
elif module=='parsinsert':
try:
parameters=param_dict['parsinsert']
except:
parameters={}
# define log fp
log_fp=join(output_dir,'parsinsert.log')
# define tax assignment values fp
tax_assign_fp=join(output_dir,'parsinsert_assignments.log')
parameters["-l"] = log_fp
parameters["-o"] = tax_assign_fp
parameters["-s"] = opts.refseq_fp
parameters["-t"] = opts.starting_tree_fp
# call the module and return a tree object
result = \
tree_insertion_module_names[module].insert_sequences_into_tree(seqs,
moltype=DNA, params=parameters)
result_tree=strip_and_rename_unwanted_labels_from_tree(align_map,result)
# write out the resulting tree
final_tree=join(output_dir,'%s_final_placement.tre' % (module))
write_updated_tree_file(final_tree,result)
if __name__ == "__main__":
main()
|