File: split_otu_table.py

package info (click to toggle)
qiime 1.8.0%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 130,508 kB
  • ctags: 10,145
  • sloc: python: 110,826; haskell: 379; sh: 169; makefile: 125
file content (75 lines) | stat: -rwxr-xr-x 3,566 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python
# File created on 15 Jun 2011
from __future__ import division

__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = ["Greg Caporaso", "Antonio Gonzalez Pena"]
__license__ = "GPL"
__version__ = "1.8.0"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
 

from os.path import split, splitext, join
from numpy import inf
from qiime.util import parse_command_line_parameters, make_option, create_dir
from qiime.parse import parse_mapping_file
from qiime.split import split_mapping_file_on_field, split_otu_table_on_sample_metadata

script_info = {}
script_info['brief_description'] = "Split in a single OTU table into one OTU table per value in a specified field of the mapping file."
script_info['script_description'] = ""
script_info['script_usage'] = [("","Split otu_table.biom into per-study OTU tables, and store the results in ./per_study_otu_tables/","%prog -i otu_table.biom -m Fasting_Map.txt -f Treatment -o per_study_otu_tables")]
script_info['output_description']= ""
script_info['required_options'] = [
 make_option('-i','--otu_table_fp',type="existing_filepath",help='the input otu table'),
 make_option('-m','--mapping_fp',type="existing_filepath",help='the mapping file path'),
 make_option('-f','--mapping_field',type='string',help="mapping column to split otu table on"),
 make_option('-o','--output_dir',type="new_dirpath",help='the output directory'),
]
script_info['optional_options'] = [
 # this is known issue, see https://github.com/qiime/qiime/issues/417
 # and https://github.com/qiime/qiime/issues/941
 # make_option('-c','--column_rename_ids',type='string',help='Mapping column used as sample id in the output files.' +\
 #                ' Has to be unique in the splited samples. This option can be helpful to create otu tables' +
 #                ' and mapping files for Procustes analysis.', default=None),
 # make_option('--include_repeat_cols',action='store_true', help='By default the new mapping files' +\
 #                ' will not have the columns that have the same information, to include them use this' +\
 #                ' option. This can be helpful to create mapping files for Procrustes analysis.', 
 #                default=False)
]
script_info['version'] = __version__


def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    
    otu_table_fp = opts.otu_table_fp
    mapping_fp = opts.mapping_fp
    mapping_field = opts.mapping_field
    output_dir = opts.output_dir
    # column_rename_ids = opts.column_rename_ids
    # include_repeat_cols = opts.include_repeat_cols
    
    create_dir(output_dir)
    
    # split mapping file
    mapping_f = open(mapping_fp,'U')
    for fp_str, sub_mapping_s in split_mapping_file_on_field(mapping_f,mapping_field):
        mapping_output_fp = join(output_dir,'mapping_%s.txt' % fp_str)
        open(mapping_output_fp,'w').write(sub_mapping_s)
    
    # split otu table
    otu_table_base_name = splitext(split(otu_table_fp)[1])[0]
    mapping_f = open(mapping_fp,'U')
    otu_table_f = open(otu_table_fp,'U')
    for fp_str, sub_otu_table_s in split_otu_table_on_sample_metadata(otu_table_f,
                                                                      mapping_f,
                                                                      mapping_field):
        otu_table_output_fp = join(output_dir,'%s_%s.biom' % (otu_table_base_name, fp_str))
        open(otu_table_output_fp,'w').write(sub_otu_table_s)


if __name__ == "__main__":
    main()