1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
|
# Copyright 2003 Iddo Friedberg. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""A parser for the NCBI blastpgp version 2.2.5 output format. Currently only supports
the '-m 9' option, (table w/ annotations).
Returns a BlastTableRec instance
"""
import sys
class BlastTableEntry(object):
def __init__(self, in_rec):
bt_fields = in_rec.split()
self.qid = bt_fields[0].split('|')
self.sid = bt_fields[1].split('|')
self.pid = float(bt_fields[2])
self.ali_len = int(bt_fields[3])
self.mis = int(bt_fields[4])
self.gaps = int(bt_fields[5])
self.q_bounds = (int(bt_fields[6]), int(bt_fields[7]))
self.s_bounds = (int(bt_fields[8]), int(bt_fields[9]))
self.e_value = float(bt_fields[10])
self.bit_score = float(bt_fields[11])
class BlastTableRec(object):
def __init__(self):
self.program = None
self.version = None
self.date = None
self.iteration = None
self.query = None
self.database = None
self.entries = []
def add_entry(self, entry):
self.entries.append(entry)
reader_keywords = {'BLASTP': 'version',
'Iteration': 'iteration',
'Query': 'query',
'Database': 'database',
'Fields': 'fields'}
class BlastTableReader(object):
def __init__(self, handle):
self.handle = handle
inline = self.handle.readline()
# zip forward to start of record
while inline and 'BLASTP' not in inline:
inline = self.handle.readline()
self._lookahead = inline
self._n = 0
self._in_header = 1
def __next__(self):
self.table_record = BlastTableRec()
self._n += 1
inline = self._lookahead
if not inline:
return None
while inline:
if inline[0] == '#':
if self._in_header:
self._in_header = self._consume_header(inline)
else:
break
else:
self._consume_entry(inline)
self._in_header = 0
inline = self.handle.readline()
self._lookahead = inline
self._in_header = 1
return self.table_record
if sys.version_info[0] < 3:
def next(self):
"""Python 2 style alias for Python 3 style __next__ method."""
return self.__next__()
def _consume_entry(self, inline):
current_entry = BlastTableEntry(inline)
self.table_record.add_entry(current_entry)
def _consume_header(self, inline):
for keyword in reader_keywords:
if keyword in inline:
in_header = self._Parse('_parse_%s' % reader_keywords[keyword], inline)
break
return in_header
def _parse_version(self, inline):
program, version, date = inline.split()[1:]
self.table_record.program = program
self.table_record.version = version
self.table_record.date = date
return 1
def _parse_iteration(self, inline):
self.table_record.iteration = int(inline.split()[2])
return 1
def _parse_query(self, inline):
self.table_record.query = inline.split()[2:]
return 1
def _parse_database(self, inline):
self.table_record.database = inline.split()[2]
return 1
def _parse_fields(self, inline):
return 0
def _Parse(self, method_name, inline):
return getattr(self, method_name)(inline)
|