1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
|
#!/usr/bin/env python3
"""
Guess quality encoding of one or more FASTA files.
"""
import sys
import os
import subprocess
from collections import Counter
from sqt.io.fasta import FastqReader, guess_quality_base
from sqt import HelpfulArgumentParser
__author__ = "Marcel Martin"
def get_argument_parser():
parser = HelpfulArgumentParser(description=__doc__)
add = parser.add_argument
add('--verbose', '-v', default=False, action='store_true',
help='Print histogram of found characters')
add('--limit', '-n', default=10000, type=int,
help='Inspect the first LIMIT records in the FASTQ file (default: %(default)s)')
add('fastq', nargs='+', metavar='FASTQ',
help='Input FASTQ files (may be gzipped).')
return parser
def main():
parser = get_argument_parser()
args = parser.parse_args()
for path in args.fastq:
if args.verbose:
print('## File:', path)
else:
print(path, end='')
freqs, guess = guess_quality_base(path)
if args.verbose:
print()
print('character ASCII frequency')
for c in sorted(freqs):
print("{} {:3} {:7}".format(chr(c), c, freqs[c]))
print()
else:
print(' is ', end='')
guess = { 33: 'phred33', 64: 'phred64', None: 'unknown'}[guess]
if args.verbose:
print("Quality value range assuming phred33: {}..{}".format(min(freqs) - 33, max(freqs) - 33))
print("Quality value range assuming phred64: {}..{}".format(min(freqs) - 64, max(freqs) - 64))
print("This is probably", guess)
else:
print(guess)
if __name__ == '__main__':
main()
|