File: split_otu_table.py

package info (click to toggle)
qiime 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 29,704 kB
  • sloc: python: 77,837; haskell: 379; sh: 113; makefile: 103
file content (89 lines) | stat: -rwxr-xr-x 4,065 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python
# File created on 15 Jun 2011
from __future__ import division

__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = ["Greg Caporaso", "Antonio Gonzalez Pena"]
__license__ = "GPL"
__version__ = "1.4.0"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Release"
 

from os.path import split, splitext, join
from qiime.util import parse_command_line_parameters, make_option, create_dir
from qiime.parse import parse_mapping_file
from qiime.filter_by_metadata import filter_otus_and_map

script_info = {}
script_info['brief_description'] = "Split in a single OTU table into one OTU table per value in a specified field of the mapping file."
script_info['script_description'] = ""
script_info['script_usage'] = [("","Split otu_table.txt into per-study OTU tables, and store the results in ./per_study_otu_tables/","%prog -i otu_table.txt -m mapping.txt -f Study -o per_study_otu_tables")]
script_info['output_description']= ""
script_info['required_options'] = [
 make_option('-i','--otu_table_fp',type="existing_filepath",help='the input otu table'),
 make_option('-m','--mapping_fp',type="existing_filepath",help='the mapping file path'),
 make_option('-f','--mapping_field',help="mapping column to split otu table on"),
 make_option('-o','--output_dir',type="new_dirpath",help='the output directory'),
]
script_info['optional_options'] = [
 make_option('-c','--column_rename_ids',help='Mapping column used as sample id in the output files.' +\
                ' Has to be unique in the splited samples. This option can be helpful to create otu tables' +
                ' and mapping files for Procustes analysis.', default=None),
 make_option('--include_repeat_cols',action='store_true', help='By default the new mapping files' +\
                ' will not have the columns that have the same information, to include them use this' +\
                ' option. This can be helpful to create mapping files for Procrustes analysis.', 
                default=False),
]
script_info['version'] = __version__


def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    otu_table_fp = opts.otu_table_fp
    mapping_fp = opts.mapping_fp
    mapping_field = opts.mapping_field
    output_dir = opts.output_dir
    column_rename_ids = opts.column_rename_ids
    include_repeat_cols = opts.include_repeat_cols
    
    otu_table_base_name = splitext(split(otu_table_fp)[1])[0]
    
    mapping_data, headers, comments = parse_mapping_file(open(mapping_fp,'U'))
    try:
        field_index = headers.index(mapping_field)
    except ValueError:
        option_parser.error("Field is not in mapping file (search is case "+\
        "and white-space sensitive). \n\tProvided field: "+\
        "%s. \n\tValid fields: %s" % (mapping_field,' '.join(headers)))
    if column_rename_ids: 
        try:
            column_rename_ids = headers.index(column_rename_ids)
        except ValueError:
            option_parser.error("Field is not in mapping file (search is case "+\
                 "and white-space sensitive). \n\tProvided field: "+\
                 "%s. \n\tValid fields: %s" % (mapping_field,' '.join(headers)))
    
    mapping_values = set([e[field_index] for e in mapping_data])
    
    create_dir(output_dir)
        
    for v in mapping_values:
        v_fp_str = v.replace(' ','_')
        otu_table_output_fp = join(output_dir,'%s_%s.txt' % (otu_table_base_name, v_fp_str))
        mapping_output_fp = join(output_dir,'mapping_%s.txt' % v_fp_str)
        filter_otus_and_map(open(mapping_fp,'U'), 
                            open(otu_table_fp,'U'), 
                            open(mapping_output_fp,'w'), 
                            open(otu_table_output_fp,'w'),
                            valid_states_str="%s:%s" % (mapping_field,v),
                            num_seqs_per_otu=1, include_repeat_cols=include_repeat_cols, column_rename_ids=column_rename_ids)
    


if __name__ == "__main__":
    main()