File: readSummary.py

package info (click to toggle)
pbsuite 15.8.24%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 14,512 kB
  • sloc: python: 10,962; sh: 147; xml: 21; makefile: 14
file content (27 lines) | stat: -rwxr-xr-x 1,021 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/python

import argparse, json
from pbsuite.jelly.Jelly import JellyProtocol
from pbsuite.utils.FileHandlers import FastaFile, FastqFile
from pbsuite.utils.summarizeAssembly import getStats

USAGE = """Get statistics on fasta/fastq sequences recorded in a Protocol.xml"""

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("xml", metavar="XML", type=str, \
                        help="Protocol.xml with inputs listed")
    args = parser.parse_args()
    protocol = JellyProtocol(args.xml)
    seqLengths = []
    for i in protocol.inputs:
        if i.endswith(".fasta"):
            f = FastaFile(i)
            for j in f.values():
                seqLengths.append(len(j))
        if i.endswith(".fastq"):
            f = FastqFile(i)
            for j in f.values():
                seqLengths.append(len(j.seq))
    print "Read Stats", json.dumps(getStats(seqLengths), indent=4)