File: ParseBlastTable.py

package info (click to toggle)
python-biopython 1.68%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 46,860 kB
  • ctags: 13,237
  • sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63
file content (120 lines) | stat: -rw-r--r-- 3,717 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Copyright 2003 Iddo Friedberg. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""A parser for the NCBI blastpgp version 2.2.5 output format. Currently only supports
the '-m 9' option, (table w/ annotations).
Returns a BlastTableRec instance
"""

import sys


class BlastTableEntry(object):
    def __init__(self, in_rec):
        bt_fields = in_rec.split()
        self.qid = bt_fields[0].split('|')
        self.sid = bt_fields[1].split('|')
        self.pid = float(bt_fields[2])
        self.ali_len = int(bt_fields[3])
        self.mis = int(bt_fields[4])
        self.gaps = int(bt_fields[5])
        self.q_bounds = (int(bt_fields[6]), int(bt_fields[7]))
        self.s_bounds = (int(bt_fields[8]), int(bt_fields[9]))
        self.e_value = float(bt_fields[10])
        self.bit_score = float(bt_fields[11])


class BlastTableRec(object):
    def __init__(self):
        self.program = None
        self.version = None
        self.date = None
        self.iteration = None
        self.query = None
        self.database = None
        self.entries = []

    def add_entry(self, entry):
        self.entries.append(entry)

reader_keywords = {'BLASTP': 'version',
                   'Iteration': 'iteration',
                   'Query': 'query',
                   'Database': 'database',
                   'Fields': 'fields'}


class BlastTableReader(object):
    def __init__(self, handle):
        self.handle = handle
        inline = self.handle.readline()
        # zip forward to start of record
        while inline and 'BLASTP' not in inline:
            inline = self.handle.readline()
        self._lookahead = inline
        self._n = 0
        self._in_header = 1

    def __next__(self):
        self.table_record = BlastTableRec()
        self._n += 1
        inline = self._lookahead
        if not inline:
            return None
        while inline:
            if inline[0] == '#':
                if self._in_header:
                    self._in_header = self._consume_header(inline)
                else:
                    break
            else:
                self._consume_entry(inline)
                self._in_header = 0

            inline = self.handle.readline()
        self._lookahead = inline
        self._in_header = 1
        return self.table_record

    if sys.version_info[0] < 3:
        def next(self):
            """Python 2 style alias for Python 3 style __next__ method."""
            return self.__next__()

    def _consume_entry(self, inline):
        current_entry = BlastTableEntry(inline)
        self.table_record.add_entry(current_entry)

    def _consume_header(self, inline):
        for keyword in reader_keywords:
            if keyword in inline:
                in_header = self._Parse('_parse_%s' % reader_keywords[keyword], inline)
                break
        return in_header

    def _parse_version(self, inline):
        program, version, date = inline.split()[1:]
        self.table_record.program = program
        self.table_record.version = version
        self.table_record.date = date
        return 1

    def _parse_iteration(self, inline):
        self.table_record.iteration = int(inline.split()[2])
        return 1

    def _parse_query(self, inline):
        self.table_record.query = inline.split()[2:]
        return 1

    def _parse_database(self, inline):
        self.table_record.database = inline.split()[2]
        return 1

    def _parse_fields(self, inline):
        return 0

    def _Parse(self, method_name, inline):
        return getattr(self, method_name)(inline)