File: fastqDivide.py

package info (click to toggle)
pbsuite 15.8.24%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 14,512 kB
  • ctags: 1,951
  • sloc: python: 10,962; sh: 147; xml: 21; makefile: 14
file content (59 lines) | stat: -rwxr-xr-x 1,606 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/python
import sys, argparse
from pbsuite.utils.setupLogging import *

USAGE = "Split input.[fastq|fasta] file into N subfiles"

parser = argparse.ArgumentParser(description=USAGE, \
        formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("input", metavar="INPUT", \
                    help="File to be split")
parser.add_argument("nSplits", metavar="N", type=int, \
                    help="Number of splits to create")
parser.add_argument("-o", "--outPrefix", default=None,\
                    help="Prefix for output files")
#parser.add_argument("-m", "--multiline", action="store_true",\
                    #help="Input file is expected to be multiline per entry")

args = parser.parse_args()
format = 0
if args.input.lower().endswith(".fastq"):
    format = 4
    suffix = ".fastq"
elif args.input.lower().endswith(".fasta"):
    format = 2
    suffix = ".fasta"
if not format:
    logging.error("Input must be .fastq or .fasta")
    exit(0)

if args.outPrefix is None:
    args.outPrefix = args.input[:len(".fasta")] 

outFiles = []
for i in range(args.nSplits):
    outFiles.append(open(args.outPrefix + str(i) + suffix, 'w'))

fh = open(args.input, 'r')

index = 0

while True:
    name = fh.readline()
    seq = fh.readline()
    
    if name == "":
        break
    outFiles[index].write(name + seq)
    if format == 4:
        #plus
        outFiles[index].write(fh.readline())
        #qual
        outFiles[index].write(fh.readline())

    index += 1#I could mod here..
    if index >= args.nSplits:
        index = 0

for f in outFiles:
    f.close()