File: make_library_id_lists.py

package info (click to toggle)
qiime 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 29,704 kB
  • sloc: python: 77,837; haskell: 379; sh: 113; makefile: 103
file content (92 lines) | stat: -rwxr-xr-x 3,769 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python
# File created on 09 Feb 2010
from __future__ import division

__author__ = "Doug Wendel"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["Rob Knight", "Doug Wendel"]
__license__ = "GPL"
__version__ = "1.4.0"
__maintainer__ = "Kyle Bittinger"
__email__ = "kylebittinger@gmail.com"
__status__ = "Release"
 

from qiime.make_library_id_lists import get_first_id, get_ids
from qiime.util import parse_command_line_parameters
from qiime.util import make_option
from os.path import exists, join
from os import makedirs

script_info={}
script_info['brief_description']="""Make library id lists"""
script_info['script_description']="""Makes a list of the ids corresponding to each library represented in the input fasta file. Assumes that the libraries are the output of split_libraries.py and that they contain the 454 read id for each sequence as is standard in the split_libraries.py output. Produces a separate file for each library."""
script_info['script_usage']=[]
script_info['script_usage'].append(("""Example:""","""Create a list containing library ids for a fasta file (seqs.fna):""","""make_library_id_lists.py -i seqs.fna -o results/"""))
script_info['output_description']="""This script produces a separate file for each library."""
script_info['required_options']=[\
    make_option("-i","--input_fasta",dest='in_fasta',default = None,\
        help="The path to a FASTA file containing input sequences")
]

script_info['optional_options']=[\
    make_option("-s", "--screened_rep_seqs",dest="screened_rep_seqs",
        default=None,
        help="The path to a FASTA file containing screened representative seqs" +
        "[DEFAULT: %default]"),\
    make_option("-u", "--otus",dest="otus",
        default=None,
        help="The path to an OTU file mapping OTUs onto rep seqs" +
        "[DEFAULT: %default]"),\
    make_option("-o","--outdir",dest='outdir',\
        default = '.',\
        help=""" The base directory to save results (one file per library)."""),\
    make_option("-f", "--field",dest="field", type=int,\
        default = 1,\
        help="Index of space-delimited field to read id from [DEFAULT: %default]"),\
    make_option("--debug", dest="debug", action="store_true",
        default=False, help="Show debug output.")
]
script_info['version'] = __version__

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    options, args = option_parser.parse_args()
    if options.debug:
        print "PRODUCING DEBUG OUTPUT"

    bad_seq_ids = set()
    bad_otu_ids = None
    
    #if we got a file to screen against, find the relevant ids and delete them
    if options.screened_rep_seqs:
        bad_otu_ids = get_first_id(open(options.screened_rep_seqs, 'U'))
        if not options.otus:
            raise RuntimeError, "Must specify an OTU file if performing a screen."
        for line in open(options.otus, 'U'):
            fields = line.split()
            if fields[0] in bad_otu_ids:
                bad_seq_ids.update(fields[1:])
                
    if options.debug:
        if bad_otu_ids is not None:
            print "Found %s bad otu ids: %s" % (len(bad_otu_ids), bad_otu_ids)
        print "Found %s bad seq ids: %s" % (len(bad_seq_ids), bad_seq_ids)

    ids = get_ids(open(options.in_fasta, 'U'), options.field, bad_seq_ids,
        options.debug)

    #add empty unassigned ids for file creation
    if 'Unassigned' not in ids:
        ids['Unassigned'] = []

    if not exists(options.outdir):
        makedirs(options.outdir)
    for k, idlist in ids.items():
        outfile = open(join(options.outdir, k + '.txt'), 'w')
        outfile.write('\n'.join(sorted(idlist)))
        outfile.close()

if __name__ == "__main__":
    main()