File: readcov.py

package info (click to toggle)
python-sqt 0.8.0-9
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 824 kB
  • sloc: python: 5,964; sh: 38; makefile: 10
file content (57 lines) | stat: -rw-r--r-- 1,830 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python3
"""
Print a report for individual reads in a SAM/BAM file.
"""
__author__ = 'Marcel Martin'

import sys
from collections import Counter, namedtuple, defaultdict
from itertools import islice
from contextlib import ExitStack
from pysam import Samfile
from sqt import HelpfulArgumentParser, Cigar, cigar
from sqt.region import Region

from .bamstats import AlignedRead, print_coverage_report

def add_arguments(parser):
	arg = parser.add_argument
	arg('--minimum-length', '-m', type=int, default=1,
		help='Minimum read length. Ignore reads that are shorter. Default: %(default)s')
	arg('--quality', '-q', type=int, default=0,
		help='Minimum mapping quality (default: %(default)s')
	arg('--minimum-cover-fraction', metavar='FRACTION', type=float, default=0.01,
		help='Alignment must cover at least FRACTION of the read to appear in the cover report. (%(default)s)')
	arg("bam", metavar="SAM/BAM", help="Name of a SAM or BAM file")
	arg("region", help="Region")


def main(args=None):
	if args is None:
		parser = HelpfulArgumentParser(description=__doc__)
		add_arguments(parser)
		args = parser.parse_args()
	region = Region(args.region)
	n_records = 0
	seen_reads = set()
	with Samfile(args.bam) as sf:
		for record in sf.fetch(region.reference, region.start, region.stop):
			if record.query_length < args.minimum_length:
				continue
			n_records += 1
			if record.is_unmapped:
				unmapped += 1
				unmapped_bases += len(record.seq)
				continue
			if record.mapq < args.quality:
				continue
			assert record.cigar is not None

			if not record.query_name in seen_reads:
				aligned_read = AlignedRead(record, sf.getrname(record.tid))
				print_coverage_report(aligned_read, minimum_cover_fraction=args.minimum_cover_fraction)
				seen_reads.add(record.query_name)


if __name__ == '__main__':
	main()