File: fastqSplit.py

package info (click to toggle)
pbsuite 15.8.24%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 14,512 kB
  • ctags: 1,951
  • sloc: python: 10,962; sh: 147; xml: 21; makefile: 14
file content (71 lines) | stat: -rwxr-xr-x 1,943 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/python

import sys
from optparse import OptionParser
from collections import namedtuple
from FileHandlers import wrap, qwrap, FastqFile
from StringIO import StringIO

USAGE = """Usage: %prog <input.fastq> <baseName>
Splits a fastq into <baseName>.fasta and <baseName>.qual
Assumes Sanger Encoded Phred Scores in fastq
"""

def __parseArgs():
    parser = OptionParser(usage=USAGE)
    
    opts, args = parser.parse_args(sys.argv)
    if len(args) != 3: parser.error('Expected 2 arguments')
    
    return args[1:]

def fastqIter( fn ):
    fh = open(fn, 'r')
    FastQEntry = namedtuple("FastQEntry", "name seq qual")
    while True:
        name = fh.readline().strip()[1:]
        if name == "": break
        #seq grab
        line = fh.readline().strip()
        seq = StringIO()
        
        while not line.startswith('+'):#Assuming no name...
            seq.write(line)
            line = fh.readline().strip()
        seq = seq.getvalue()
        seqLen = len(seq)

        qual = ""
        curLen = 0

        while curLen != len(seq):
            line = fh.readline().strip()
            if line == "":
                sys.stderr.write("Bad Fastq File: Last attempted entry = %s\n" % (name))
                exit(10)
            curLen += len(line)
            qual += line
        

        yield FastQEntry(name, seq, qual)

def phredToQual( qual ):
    """
    Take a qual string that is phred/sanger encoded
    turn it into a list of quals
    """
    return map(lambda x: ord(x)-33, list(qual))
    
if __name__ == '__main__':
    fastq, baseName = __parseArgs()
    
    fout = open(baseName+".fasta", 'w')
    qout = open(baseName+".qual", 'w')
    fastq = FastqFile(fastq)
    for name in fastq:
        entry = fastq[name]
        fout.write(">%s\n%s\n" % (entry.name, wrap(entry.seq)))
        qout.write(">%s\n%s\n" % (entry.name, qwrap(phredToQual(entry.qual))))
    
    fout.close()
    qout.close()