File: BlasrIO.py

package info (click to toggle)
python-pbcore 1.7.1%2Bgit20200430.a127b1e%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 5,404 kB
  • sloc: python: 23,243; xml: 2,504; makefile: 232; sh: 66
file content (92 lines) | stat: -rw-r--r-- 2,571 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# Author: David Alexander

from pbcore.io.base import ReaderBase

__all__ = ["M4Record",
           "M4Reader",
           "M5Record",
           "M5Reader"]


class MalformattedRecord(Exception):
    pass


class M4Record:
    """
    Record for alignment summary record output from BLASR -m 4 option
    """
    @classmethod
    def fromString(cls, s):
        obj = cls()
        try:
            columns = s.strip().split()
            obj.qName = columns[0]
            obj.tName = columns[1]
            obj.score = int(columns[2])
            obj.percentSimilarity = float(columns[3])
            obj.qStrand = int(columns[4])
            obj.qStart = int(columns[5])
            obj.qEnd = int(columns[6])
            obj.qLength = int(columns[7])
            obj.tStrand = int(columns[8])
            obj.tStart = int(columns[9])
            obj.tEnd = int(columns[10])
            obj.tLength = int(columns[11])
            obj.mapQV = int(columns[12])
            return obj
        except Exception:
            raise MalformattedRecord(s)


class M4Reader(ReaderBase):
    """
    Reader for -m 4 formatted alignment summary information from BLASR
    """

    def __iter__(self):
        for line in self.file:
            yield M4Record.fromString(line)


class M5Record:
    """
    Record for alignment summary record output from BLASR -m 5 option
    """
    @classmethod
    def fromString(cls, s):
        obj = cls()
        try:
            columns = s.strip().split()
            obj.qName = columns[0]
            obj.qLength = int(columns[1])
            obj.qStart = int(columns[2])
            obj.qEnd = int(columns[3])
            obj.qStrand = columns[4]
            obj.tName = columns[5]
            obj.tLength = int(columns[6])
            obj.tStart = int(columns[7])
            obj.tEnd = int(columns[8])
            obj.tStrand = columns[9]
            obj.score = float(columns[10])
            obj.numMatch = int(columns[11])
            obj.numMismatch = int(columns[12])
            obj.numIns = int(columns[13])
            obj.numDel = int(columns[14])
            obj.mapQV = int(columns[15])
            obj.qAlignedSeq = columns[16]
            obj.matchPattern = columns[17]
            obj.tAlignedSeq = columns[18]
            return obj
        except Exception:
            raise MalformattedRecord(s)


class M5Reader(ReaderBase):
    """
    Reader for -m 5 formatted alignment summary information from BLASR
    """

    def __iter__(self):
        for line in self.file:
            yield M5Record.fromString(line)