File: KeyWList.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (147 lines) | stat: -rw-r--r-- 4,615 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Copyright 1999 by Jeffrey Chang.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""
This module provides code to work with the keywlist.txt file from
SwissProt.
http://www.expasy.ch/sprot/sprot-top.html


Classes:
ListParser        Parses a keywlist.txt file into a list of keywords.

_Scanner          Scans the keywlist.txt file.
_ListConsumer     Consumes keywlist data to a list.


Functions:
extract_keywords  Return the keywords from a keywlist.txt file.

"""

from types import *

from Bio import File
from Bio.ParserSupport import *

class ListParser(AbstractParser):
    """Parses keywlist.txt data into a list of keywords.

    """
    def __init__(self):
        self._scanner = _Scanner()
        self._consumer = _ListConsumer()

    def parse(self, handle):
        self._scanner.feed(handle, self._consumer)
        return self._consumer.keywords


class _Scanner:
    """Scan the keywlist.txt file included with the SwissProt distribution.

    Tested with:
    Release 37
    Release 38
    """

    def feed(self, handle, consumer):
        """feed(self, handle, consumer)

        Feed in the keywlist.txt file for scanning.  handle is a file-like
        object that contains keyword information.  consumer is a
        Consumer object that will receive events as the report is scanned.

        """
        if isinstance(handle, File.UndoHandle):
            uhandle = handle
        else:
            uhandle = File.UndoHandle(handle)
        
        self._scan_header(uhandle, consumer)
        self._scan_keywords(uhandle, consumer)
        self._scan_footer(uhandle, consumer)

    def _scan_header(self, uhandle, consumer):
        consumer.start_header()
        
        read_and_call(uhandle, consumer.noevent, start='----')
        read_and_call(uhandle, consumer.noevent, blank=1)
        read_and_call(uhandle, consumer.noevent, contains="SWISS-PROT")
        read_and_call(uhandle, consumer.noevent, contains="Release")
        read_and_call(uhandle, consumer.noevent, blank=1)
        read_and_call(uhandle, consumer.noevent, start='----')

        read_and_call(uhandle, consumer.noevent, blank=1)
        read_and_call(uhandle, consumer.noevent, start='List of keywords')
        read_and_call(uhandle, consumer.noevent, blank=1)
        read_and_call(uhandle, consumer.noevent, start='----')

        while 1:
            if attempt_read_and_call(uhandle, consumer.noevent, start='----'):
                break
            read_and_call(uhandle, consumer.noevent, blank=0)

        read_and_call(uhandle, consumer.noevent, start='Document name')
        read_and_call(uhandle, consumer.noevent, start='----')
        read_and_call(uhandle, consumer.noevent, blank=1)
        
        consumer.end_header()

    def _scan_keywords(self, uhandle, consumer):
        consumer.start_keywords()

        # SwissProt38 starts with lines:
        # Keyword
        # ______________________________________
        #
        # Check and see if it's release 38, and parse it.
        if attempt_read_and_call(uhandle, consumer.noevent, start='Keyword'):
            read_and_call(uhandle, consumer.noevent, start='____')

        while 1:
            if not attempt_read_and_call(uhandle, consumer.keyword, blank=0):
                break
        read_and_call(uhandle, consumer.noevent, blank=1)
        
        consumer.end_keywords()

    def _scan_footer(self, uhandle, consumer):
        consumer.start_footer()

        read_and_call(uhandle, consumer.noevent, start='----')
        while 1:
            if attempt_read_and_call(uhandle, consumer.noevent, start='----'):
                break
            read_and_call(uhandle, consumer.copyright, blank=0)

        consumer.end_footer()

class _ListConsumer(AbstractConsumer):
    """Consumer that converts a keywlist.txt file into a list of keywords.

    Members:
    keywords    List of keywords.

    """
    def __init__(self):
        self.keywords = None

    def start_keywords(self):
        self.keywords = []

    def keyword(self, line):
        self.keywords.append(string.rstrip(line))

def extract_keywords(keywlist_handle):
    """extract_keywords(keywlist_handle) -> list of keywords

    Return the keywords from a keywlist.txt file.

    """
    if type(keywlist_handle) is not FileType and \
       type(keywlist_handle) is not InstanceType:
        raise ValueError, "I expected a file handle or file-like object"
    return ListParser().parse(keywlist_handle)