File: filter_tree.py

package info (click to toggle)
qiime 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 29,704 kB
  • sloc: python: 77,837; haskell: 379; sh: 113; makefile: 103
file content (118 lines) | stat: -rwxr-xr-x 3,933 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python
# File created on 18 Jun 2011
from __future__ import division

__author__ = "William Van Treuren"
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = ["William Van Treuren","Greg Caporaso", "Daniel McDonald","Justin Kuczynski"]
__license__ = "GPL"
__version__ = "1.4.0"
__maintainer__ = "William Van Treuren"
__email__ = "vantreur@colorado.edu"
__status__ = "Release"


from cogent.parse.tree import DndParser
from qiime.filter import (filter_fasta, negate_tips_to_keep,
                          get_seqs_to_keep_lookup_from_seq_id_file,
                          get_seqs_to_keep_lookup_from_fasta_file)
from qiime.util import parse_command_line_parameters, make_option

script_info = {}
script_info['brief_description'] = """This script prunes a tree based on a set of tip names"""
                                      
script_info['script_description'] = """This script takes a tree and a list of OTU IDs (in one of several supported formats) and outputs a subtree retaining only the tips on the tree which are found in the inputted list of OTUs (or not found, if the --negate option is provided)."""
    
script_info['script_usage'] = []
script_info['script_usage'].append(("""Prune a tree to include only the tips in tips_to_keep.txt""",\
    """""",\
    """%prog -i rep_seqs.tre -t tips_to_keep.txt -o rep_seqs_subtree.tre"""))
script_info['script_usage'].append(("""Prune a tree to remove the tips in tips_to_remove.txt. Note that the -n/--negate option must be passed for this functionality.""",\
    """""",\
    """%prog -i rep_seqs.tre -t tips_to_remove.txt -o rep_seqs_subtree.tre -n"""))

script_info['output_description'] = \
    """Output is a pruned tree in newick format."""

script_info['required_options']=[\
 make_option('-i',
  '--input_tree_filepath',
  action='store',
  type='existing_filepath',
  dest='input_tree_fp',     
  help='input tree filepath'),

 make_option('-o',
  '--output_tree_filepath',
  action='store',
  type='new_filepath',
  dest='output_tree_fp',
  help='output tree filepath'),\
]

script_info['optional_options']=[\
 make_option('-n',
  '--negate',
  default=False,
  action='store_true',
  help='if negate is not false will prune tips fed in and save \
   all others [default: %default]'),

 make_option('-t',
  '--tips_fp',
  action='store',
  type='existing_filepath',
 help='A list of sequence identifiers (or tab-delimited lines with \
  a seq identifier in the first field) which should be retained \
  [default: %default]'),

 make_option('-f',
 '--fasta_fp',
 action='store',
 type='existing_filepath',
 help='A fasta file where the seq ids should be retained'
                 ' [default: %default]'),\
]

script_info['version'] = __version__

def main():
    
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    input_tree_fp = opts.input_tree_fp
    tips_fp = opts.tips_fp
    fasta_fp = opts.fasta_fp
    output_tree_fp = opts.output_tree_fp
    
    if tips_fp != None:
        tips_to_keep = get_seqs_to_keep_lookup_from_seq_id_file(open(tips_fp,'U'))
    elif fasta_fp != None:
        tips_to_keep = get_seqs_to_keep_lookup_from_fasta_file(open(fasta_fp,'U'))
    else:
        option_parser.error("Must provide either -t or -f.")
    
    tree = DndParser(open(input_tree_fp,'U'))
    
    if opts.negate:
        tips_to_keep = negate_tips_to_keep(tips_to_keep, tree)

    t2 = tree.copy()
    wanted = tips_to_keep
    def delete_test(node):
        if node.istip() and node.Name not in wanted:
            return True
        return False
    t2.removeDeleted(delete_test)
    t2.prune()
    tree_out = t2

    ## don't use this, it doesn't eliminate tips!
    # tree_out = tree.getSubTree(tips_to_keep,ignore_missing=True)
   
    tree_out.writeToFile(output_tree_fp)

if __name__ == "__main__":
    # this comes in handy sometimes
    # import sys
    # sys.setrecursionlimit(10000)
    main()