1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
|
############################################################################
# Copyright (c) 2015 Saint Petersburg State University
# All Rights Reserved
# See file LICENSE for details.
############################################################################
import re
import sys
import itertools
import sam_parser
pattern = re.compile('([0-9]*)([MIDNSHP])')
def parse(cigar, len, pos = 0):
if cigar == "=" :
for i in range(len):
yield (i, i + pos)
return
if cigar == "X":
return
cur = 0
curr = pos
for n, c in pattern.findall(cigar):
if n:
n = int(n)
else:
n = 1
if c == 'M':
for i in range(n):
yield (cur, curr)
cur += 1
curr += 1
elif c == 'DPN':
curr += n
elif c in "IS":
cur += n
def CollectQuality(contigs, sam):
qual = [[[0,0] for i in range(len(contig))] for contig in contigs]
for rec in sam:
if rec.proper_alignment:
for seq_pos, contig_pos in parse(rec.cigar, rec.alen, rec.pos - 1):
if rec.seq[seq_pos] == contigs[rec.tid].seq[contig_pos]:
qual[rec.tid][contig_pos][1] += 1
qual[rec.tid][contig_pos][0] += ord(rec.qual[seq_pos])
return qual
def CountContigQuality(contigs, qual):
for i in range(len(contigs)):
cnt = 0
qual_list = [chr(33)] * len(contigs[i])
for pos in range(len(contigs[i])):
q = qual[i][pos]
if q[1] != 0:
qual_list[pos] = chr(q[0] // q[1])
else:
cnt += 1
contigs[i].qual = "".join(qual_list)
def GenerateQuality(contigs, sam):
qual = CollectQuality(contigs, sam)
CountContigQuality(contigs, qual)
|