File: ParseBlastTable.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (104 lines) | stat: -rw-r--r-- 3,467 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Copyright 2003 Iddo Friedberg. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

import string
"""A parser for the NCBI blastpgp version 2.2.5 output format. Currently only supports
the '-m 9' option, (table w/ annotations).
Returns a BlastTableRec instance
"""

class BlastTableEntry:
   def __init__(self,in_rec):
      bt_fields = in_rec.split()
      self.qid = bt_fields[0].split('|')
      self.sid = bt_fields[1].split('|')
      self.pid = string.atof(bt_fields[2])
      self.ali_len = string.atoi(bt_fields[3])
      self.mis = string.atoi(bt_fields[4])
      self.gaps = string.atoi(bt_fields[5])
      self.q_bounds = (string.atoi(bt_fields[6]), string.atoi(bt_fields[7]))
      self.s_bounds = (string.atoi(bt_fields[8]), string.atoi(bt_fields[9]))
      self.e_value = string.atof(bt_fields[10])
      self.bit_score = string.atof(bt_fields[11])
      
class BlastTableRec:
   def __init__(self):
      self.program = None
      self.version = None
      self.date = None
      self.iteration = None
      self.query = None
      self.database = None
      self.entries = []
   def add_entry(self, entry):
      self.entries.append(entry)

reader_keywords = {'BLASTP': 'version',
                   'Iteration': 'iteration',
                   'Query': 'query',
                   'Database': 'database',
                   'Fields': 'fields'}
class BlastTableReader:
   def __init__(self, handle):
      self.handle = handle
      inline = self.handle.readline()
      # zip forward to start of record
      while inline and inline.find('BLASTP') == -1:
         inline = self.handle.readline()
      self._lookahead = inline
      self._n = 0
      self._in_header = 1
   def next(self):
      self.table_record = BlastTableRec()
      self._n += 1
      inline = self._lookahead
      if not inline:
         return None
      while inline:
         if inline[0] == '#':
            if self._in_header:
               self._in_header = self._consume_header(inline)
            else:
               break
         else:
            self._consume_entry(inline)
            self._in_header = 0

         inline = self.handle.readline()
      self._lookahead = inline
      self._in_header = 1
      return self.table_record
         
   def _consume_entry(self, inline):
      current_entry = BlastTableEntry(inline)
      self.table_record.add_entry(current_entry)
   def _consume_header(self, inline):
      for keyword in reader_keywords.keys():
         if inline.find(keyword) > -1:
            in_header = self._Parse('_parse_%s' % reader_keywords[keyword],inline)
            break
      return in_header
   def _parse_version(self, inline):
      program, version, date = inline.split()[1:]
      self.table_record.program = program
      self.table_record.version = version
      self.table_record.date = date
      return 1
   def _parse_iteration(self, inline):
      self.table_record.iteration = string.atoi(inline.split()[2])
      return 1
   def _parse_query(self, inline):
      self.table_record.query = inline.split()[2:]
      return 1
   def _parse_database(self, inline):
      self.table_record.database = inline.split()[2]
      return 1
   def _parse_fields(self, inline):
      return 0
   def _Parse(self, method_name, inline):
      return getattr(self,method_name)(inline)