File: generate_quality.py

package info (click to toggle)
spades 3.13.1+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 22,172 kB
  • sloc: cpp: 136,213; ansic: 48,218; python: 16,809; perl: 4,252; sh: 2,115; java: 890; makefile: 507; pascal: 348; xml: 303
file content (64 lines) | stat: -rw-r--r-- 1,844 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
############################################################################
# Copyright (c) 2015 Saint Petersburg State University
# All Rights Reserved
# See file LICENSE for details.
############################################################################

import re
import sys
import itertools
import sam_parser

pattern = re.compile('([0-9]*)([MIDNSHP])')

def parse(cigar, len, pos = 0):
    if cigar == "=" :
        for i in range(len):
            yield (i, i + pos)
        return
    if cigar == "X":
        return
    cur = 0
    curr = pos
    for n, c in pattern.findall(cigar):
        if n:
            n = int(n)
        else:
            n = 1
        if c == 'M':
            for i in range(n):
                yield (cur, curr)
                cur += 1
                curr += 1
        elif c == 'DPN':
            curr += n
        elif c in "IS":
            cur += n

def CollectQuality(contigs, sam):
    qual = [[[0,0] for i in range(len(contig))] for contig in contigs]
    for rec in sam:
        if rec.proper_alignment:
            for seq_pos, contig_pos in parse(rec.cigar, rec.alen, rec.pos - 1):
                if rec.seq[seq_pos] == contigs[rec.tid].seq[contig_pos]:
                    qual[rec.tid][contig_pos][1] += 1
                    qual[rec.tid][contig_pos][0] += ord(rec.qual[seq_pos])
    return qual

def CountContigQuality(contigs, qual):
    for i in range(len(contigs)):
        cnt = 0
        qual_list = [chr(33)] * len(contigs[i])
        for pos in range(len(contigs[i])):
            q = qual[i][pos]
            if q[1] != 0:
                qual_list[pos] = chr(q[0] // q[1])
            else:
                cnt += 1
        contigs[i].qual = "".join(qual_list)


def GenerateQuality(contigs, sam):
    qual = CollectQuality(contigs, sam)
    CountContigQuality(contigs, qual)