File: filter_tree.py

package info (click to toggle)
qiime 1.8.0%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 130,508 kB
  • ctags: 10,145
  • sloc: python: 110,826; haskell: 379; sh: 169; makefile: 125
file content (110 lines) | stat: -rwxr-xr-x 3,874 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python
# File created on 18 Jun 2011
from __future__ import division

__author__ = "William Van Treuren"
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = ["William Van Treuren","Greg Caporaso", "Daniel McDonald","Justin Kuczynski"]
__license__ = "GPL"
__version__ = "1.8.0"
__maintainer__ = "William Van Treuren"
__email__ = "vantreur@colorado.edu"


from cogent.parse.tree import DndParser
from qiime.filter import (filter_fasta, negate_tips_to_keep,
                          get_seqs_to_keep_lookup_from_seq_id_file,
                          get_seqs_to_keep_lookup_from_fasta_file,
                          filter_tree)
from qiime.util import parse_command_line_parameters, make_option

script_info = {}
script_info['brief_description'] = """This script prunes a tree based on a set of tip names"""
                                      
script_info['script_description'] = """This script takes a tree and a list of OTU IDs (in one of several supported formats) and outputs a subtree retaining only the tips on the tree which are found in the inputted list of OTUs (or not found, if the --negate option is provided)."""
    
script_info['script_usage'] = []
script_info['script_usage'].append(("""Prune a tree to include only the tips in tips_to_keep.txt""",\
    """""",\
    """%prog -i rep_seqs.tre -t tips_to_keep.txt -o pruned.tre"""))
script_info['script_usage'].append(("""Prune a tree to remove the tips in tips_to_remove.txt. Note that the -n/--negate option must be passed for this functionality""",\
    """""",\
    """%prog -i rep_seqs.tre -t tips_to_keep.txt -o negated.tre -n"""))
script_info['script_usage'].append(("""Prune a tree to include only the tips found in the fasta file provided""",\
    """""",\
    """%prog -i rep_seqs.tre -f fast_f.fna -o pruned_fast.tre"""))
script_info['output_description'] = \
    """Output is a pruned tree in newick format."""

script_info['required_options']=[\
 make_option('-i',
  '--input_tree_filepath',
  action='store',
  type='existing_filepath',
  dest='input_tree_fp',     
  help='input tree filepath'),

 make_option('-o',
  '--output_tree_filepath',
  action='store',
  type='new_filepath',
  dest='output_tree_fp',
  help='output tree filepath'),\
]

script_info['optional_options']=[\
 make_option('-n',
  '--negate',
  default=False,
  action='store_true',
  help='if negate is True will remove input tips/seqs, if \
   negate is False, will retain input tips/seqs [default: %default]'),

 make_option('-t',
  '--tips_fp',
  action='store',
  type='existing_filepath',
 help='A list of tips (one tip per line) or sequence identifiers \
  (tab-delimited lines with a seq identifier in the first field) \
  which should be retained \
  [default: %default]'),

 make_option('-f',
 '--fasta_fp',
 action='store',
 type='existing_filepath',
 help='A fasta file where the seq ids should be retained'
                 ' [default: %default]'),\
]

script_info['version'] = __version__

def main():
    
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    input_tree_fp = opts.input_tree_fp
    tips_fp = opts.tips_fp
    fasta_fp = opts.fasta_fp
    output_tree_fp = opts.output_tree_fp
    
    if tips_fp != None:
        tips_to_keep = get_seqs_to_keep_lookup_from_seq_id_file(open(tips_fp,'U'))
    elif fasta_fp != None:
        tips_to_keep = get_seqs_to_keep_lookup_from_fasta_file(open(fasta_fp,'U'))
    else:
        option_parser.error("Must provide either -t or -f.")
    
    tree = DndParser(open(input_tree_fp,'U'))
    

    if opts.negate:
        tips_to_keep = negate_tips_to_keep(tips_to_keep, tree)

    filtered_tree = filter_tree(tree,tips_to_keep)
    filtered_tree.writeToFile(output_tree_fp)

if __name__ == "__main__":
    # this comes in handy sometimes
    # import sys
    # sys.setrecursionlimit(10000)
    main()