File: subsample_fasta.py

package info (click to toggle)
qiime 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 29,704 kB
  • sloc: python: 77,837; haskell: 379; sh: 113; makefile: 103
file content (62 lines) | stat: -rwxr-xr-x 1,964 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python
from __future__ import division

__author__ = "William Walters"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["William Walters"]
__license__ = "GPL"
__version__ = "1.4.0"
__maintainer__ = "William Walters"
__email__ = "william.a.walters@gmail.com"
__status__ = "Release"

from os.path import split, splitext

from qiime.util import parse_command_line_parameters, get_options_lookup,\
 make_option, subsample_fasta

options_lookup = get_options_lookup()

script_info={}
script_info['brief_description']="""Randomly subsample sequences from a given fasta file"""
script_info['script_description']="""Subsample the seqs.fna file, randomly select 5% of the sequences:"""
script_info['script_usage']=[]
script_info['script_usage'].append(("""Example:""",""" """,""" subsample_fasta.py -i seqs.fasta -p 0.05"""))
script_info['output_description']=""""""
script_info['required_options']=[\
   options_lookup['fasta_as_primary_input'],\
   make_option('-p','--percent_subsample',action='store',type='float',\
        help='Specify the percentage of sequences to subsample')
]
script_info['optional_options']=[\
   options_lookup['output_fp']\
] 
script_info['version'] = __version__


def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
      
    verbose = opts.verbose
    
    input_fasta_fp = opts.input_fasta_fp
    output_fp = opts.output_fp
    percent_subsample = opts.percent_subsample
    
    if percent_subsample > 1 or percent_subsample <= 0:
        raise ValueError,('percent_subsample must be in range of 0-1')
    
    if not output_fp:
        input_file_basename, input_file_ext = \
         splitext(split(input_fasta_fp)[1])
        output_fp = '%s_subsample_%3.2f%s' % (input_file_basename,
         percent_subsample,input_file_ext)
         
    subsample_fasta(input_fasta_fp, output_fp, percent_subsample)
         

        


if __name__ == "__main__":
    main()