File: ScanProsite.py

package info (click to toggle)
python-biopython 1.73%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 57,852 kB
  • sloc: python: 169,977; xml: 97,539; ansic: 15,653; sql: 1,208; makefile: 159; sh: 63
file content (131 lines) | stat: -rw-r--r-- 4,430 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# Copyright 2009 by Michiel de Hoon. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.

# Importing these functions with leading underscore as not intended for reuse
from Bio._py3k import urlopen as _urlopen
from Bio._py3k import urlencode as _urlencode

from xml.sax import handler
from xml.sax.expatreader import ExpatParser


class Record(list):
    """Represents search results returned by ScanProsite.

    This record is a list containing the search results returned by
    ScanProsite. The record also contains the data members n_match,
    n_seq, capped, and warning.
    """

    def __init__(self):
        """Initialize the class."""
        self.n_match = None
        self.n_seq = None
        self.capped = None
        self.warning = None


def scan(seq="", mirror='https://www.expasy.org', output='xml', **keywords):
    """Execute a ScanProsite search.

    Arguments:
     - mirror:   The ScanProsite mirror to be used
                 (default: https://www.expasy.org).
     - seq:      The query sequence, or UniProtKB (Swiss-Prot,
                 TrEMBL) accession
     - output:   Format of the search results
                 (default: xml)

    Further search parameters can be passed as keywords; see the
    documentation for programmatic access to ScanProsite at
    https://www.expasy.org/tools/scanprosite/ScanPrositeREST.html
    for a description of such parameters.

    This function returns a handle to the search results returned by
    ScanProsite. Search results in the XML format can be parsed into a
    Python object, by using the Bio.ExPASy.ScanProsite.read function.
    """
    parameters = {'seq': seq,
                  'output': output}
    for key, value in keywords.items():
        if value is not None:
            parameters[key] = value
    command = _urlencode(parameters)
    url = "%s/cgi-bin/prosite/PSScan.cgi?%s" % (mirror, command)
    handle = _urlopen(url)
    return handle


def read(handle):
    """Parse search results returned by ScanProsite into a Python object."""
    content_handler = ContentHandler()
    saxparser = Parser()
    saxparser.setContentHandler(content_handler)
    saxparser.parse(handle)
    record = content_handler.record
    return record

# The functions below are considered private


class Parser(ExpatParser):

    def __init__(self):
        """Initialize the class."""
        ExpatParser.__init__(self)
        self.firsttime = True

    def feed(self, data, isFinal=0):
        # Error messages returned by the ScanProsite server are formatted as
        # as plain text instead of an XML document. To catch such error
        # messages, we override the feed method of the Expat parser.
        # The error message is (hopefully) contained in the data that was just
        # fed to the parser.
        if self.firsttime:
            if data[:5].decode('utf-8') != "<?xml":
                raise ValueError(data)
        self.firsttime = False
        return ExpatParser.feed(self, data, isFinal)


class ContentHandler(handler.ContentHandler):
    integers = ("start", "stop")
    strings = ("sequence_ac",
               "sequence_id",
               "sequence_db",
               "signature_ac",
               "level",
               "level_tag")

    def __init__(self):
        """Initialize the class."""
        self.element = []

    def startElement(self, name, attrs):
        self.element.append(name)
        self.content = ""
        if self.element == ["matchset"]:
            self.record = Record()
            self.record.n_match = int(attrs["n_match"])
            self.record.n_seq = int(attrs["n_seq"])
        elif self.element == ["matchset", "match"]:
            match = {}
            self.record.append(match)

    def endElement(self, name):
        assert name == self.element.pop()
        name = str(name)
        if self.element == ["matchset", "match"]:
            match = self.record[-1]
            if name in ContentHandler.integers:
                match[name] = int(self.content)
            elif name in ContentHandler.strings:
                match[name] = self.content
            else:
                # Unknown type, treat it as a string
                match[name] = self.content

    def characters(self, content):
        self.content += content