File: samreader.py

package info (click to toggle)
bowtie2 2.5.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 27,492 kB
  • sloc: cpp: 63,838; perl: 7,232; sh: 1,131; python: 987; makefile: 541; ansic: 122
file content (128 lines) | stat: -rwxr-xr-x 3,059 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/python3
"""
A reader of SAM format.

    import samreader
    
    # ...
    # Attach it to stdin and print the header.
    sam_reader = SamReader(sys.stdin)
    print sam_reader.header
    
    # ...
    # Open a SAM file and store each read starting position.
    with fopen(myfile) as fp:
        sr = SamReader(fp)
        for rec in sr:
            start_coord[rec.qname] = rec.pos
            

"""


class CigarString(object):
    """ 
    A basic Cigar. 
    """    
    
    def __init__(self, cigar):
        self.cigar = cigar
        self.set_idx = 0
        
        
    def __str__(self):
        return self.cigar
        

class SamHeader(object):
    """ 
    Sam Header. 
    """    
    
    def __init__(self, file_handler):
        self.header_lines = list()
        self.curr_idx = 0
        self._source_fh = file_handler   
        self.end_header_pointer = self._load_header()

    def __iter__(self):
        self.curr_idx = 0
        return self

    def __next__(self):
        if self.curr_idx == len(self.header_lines):
            raise StopIteration
            
        value = self.header_lines[self.curr_idx]
        self.curr_idx += 1
        return value

    def __str__(self):
        return "\n".join(self.header_lines)

    def _load_header(self):
        fh = self._source_fh
        fh.seek(0)
        
        last_pos = fh.tell()
        line = fh.readline().rstrip()
        while line[0] == '@':
            self.header_lines.append(line)
            last_pos = fh.tell()
            line = fh.readline().rstrip()
            
        fh.seek(last_pos)
        return last_pos
    

class SamRecord(object):
    """ 
    Record Item for SAM. 
    """    
    
    def __init__(self, sam_line):
        all_tokens = sam_line.rstrip().split('\t')
        # NOTE: not care about optional fields for now.
        self.rec_keys = ['QNAME', 'FLAG', 'RNAME', 'POS', 'MAPQ', 'CIGAR', 'RNEXT', 'PNEXT', 'TLEN', 'SEQ', 'QUAL']
        rec = {k: all_tokens[i] for i, k in enumerate(self.rec_keys)}
        rec['POS'] = int(rec['POS'])
        rec['MAPQ'] = int(rec['MAPQ'])
        rec['CIGAR'] = CigarString(rec['CIGAR'])
        rec['PNEXT'] = int(rec['PNEXT'])
        rec['TLEN'] = int(rec['TLEN']) 
        self.record = rec
        self.pos = rec['POS']
        self.qname = rec['QNAME']
        self.mapq = rec['MAPQ']
        self.rname = rec['RNAME']
        self.flag = int(rec['FLAG'])

    def __str__(self):
        return "\t".join([str(self.record[self.rec_keys[i]]) for i in range(len(self.rec_keys))])
    

class SamReader(object):
    """ 
    Iterable for all SAM records. 
    """    
    
    def __init__(self, file_handle):
        self._source_fh = file_handle
        self.header = SamHeader(file_handle)

    def __iter__(self):
        self._source_fh.seek(self.header.end_header_pointer)
        return self

    def __next__(self):
        line = self._source_fh.readline()
        if not line:
            raise StopIteration
        
        return SamRecord(line)