File: count_seqs.py

package info (click to toggle)
qiime 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 29,704 kB
  • sloc: python: 77,837; haskell: 379; sh: 113; makefile: 103
file content (92 lines) | stat: -rwxr-xr-x 3,178 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python
# File created on 29 May 2011
from __future__ import division

__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.4.0"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
__status__ = "Release"
 

from glob import glob
from qiime.util import (parse_command_line_parameters, 
                        make_option, 
                        count_seqs_in_filepaths)

script_info = {}
script_info['brief_description'] = ""
script_info['script_description'] = ""
script_info['script_usage'] = [\
 ("",
  "Count the sequences in a fasta file and write results to stdout.",
  "%prog -i in.fasta"),
 ("",
  "Count the sequences in a fasta file and a fastq file and write results to file. Note that fastq files can only be processed if they end with .fastq -- all other files are assumed to be fasta.",
  "%prog -i in1.fasta,in2.fastq -o seq_counts.txt"),
  ("",
   "Count the sequences all .fasta files in current directory and write results to stdout. Note that -i option must be quoted.",
   "%prog -i \"*.fasta\"")]
script_info['output_description']= ""
script_info['required_options'] = [\
 # Example required option
 make_option('-i','--input_fps',
        help='the input filepaths (comma-separated)'),
]
script_info['optional_options'] = [
 # Example optional option
 make_option('-o','--output_fp',type="new_filepath",
  help='the output filepath [default: write to stdout]'),\
 make_option('--suppress_errors',action='store_true',\
        help='Suppress warnings about missing files [default: %default]',
        default=False)
]
script_info['version'] = __version__

def format_output(count_data, total, inaccessible_filepaths, suppress_errors=False):
    """ Output formatter """
    lines = ['']
    for c in count_data:
        lines.append('%d  : %s (Sequence lengths (mean +/- std): %1.4f +/- %1.4f)' % 
                     (c[0][0],c[1],c[0][1],c[0][2]))
    lines.append('%d  : Total' % total)
    
    if inaccessible_filepaths and not suppress_errors:
        lines.append('')
        lines.append(\
         'Some files were not accessible. Do they exist? Do you have read permission?')
        for inaccessible_filepath in inaccessible_filepaths:
            lines.append('  %s' % inaccessible_filepath)
        lines.append('')
    return '\n'.join(lines)

def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    suppress_errors = opts.suppress_errors
    input_fps = []
    for e in opts.input_fps.split(','):
        input_fps.extend(glob(e))
    input_fps = set(input_fps)
    if len(input_fps) == 0:
        option_parser.error(\
         "No filepaths match pattern(s) passed via -i: %s" % opts.input_fps)
        
    output_fp = opts.output_fp

    count_data, total, inaccessible_filepaths = count_seqs_in_filepaths(input_fps)
    r = format_output(count_data, total, inaccessible_filepaths, suppress_errors)
    
    if opts.output_fp:
        f = open(output_fp,'w')
        f.write(r)
        f.close()
    else:
        print r


if __name__ == "__main__":
    main()