1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
# Copyright 2009 by Michiel de Hoon. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
# Importing these functions with leading underscore as not intended for reuse
from Bio._py3k import urlopen as _urlopen
from Bio._py3k import urlencode as _urlencode
from xml.sax import handler
from xml.sax.expatreader import ExpatParser
class Record(list):
"""Represents search results returned by ScanProsite.
This record is a list containing the search results returned by
ScanProsite. The record also contains the data members n_match,
n_seq, capped, and warning.
"""
def __init__(self):
"""Initialize the class."""
self.n_match = None
self.n_seq = None
self.capped = None
self.warning = None
def scan(seq="", mirror='https://www.expasy.org', output='xml', **keywords):
"""Execute a ScanProsite search.
Arguments:
- mirror: The ScanProsite mirror to be used
(default: https://www.expasy.org).
- seq: The query sequence, or UniProtKB (Swiss-Prot,
TrEMBL) accession
- output: Format of the search results
(default: xml)
Further search parameters can be passed as keywords; see the
documentation for programmatic access to ScanProsite at
https://www.expasy.org/tools/scanprosite/ScanPrositeREST.html
for a description of such parameters.
This function returns a handle to the search results returned by
ScanProsite. Search results in the XML format can be parsed into a
Python object, by using the Bio.ExPASy.ScanProsite.read function.
"""
parameters = {'seq': seq,
'output': output}
for key, value in keywords.items():
if value is not None:
parameters[key] = value
command = _urlencode(parameters)
url = "%s/cgi-bin/prosite/PSScan.cgi?%s" % (mirror, command)
handle = _urlopen(url)
return handle
def read(handle):
"""Parse search results returned by ScanProsite into a Python object."""
content_handler = ContentHandler()
saxparser = Parser()
saxparser.setContentHandler(content_handler)
saxparser.parse(handle)
record = content_handler.record
return record
# The functions below are considered private
class Parser(ExpatParser):
def __init__(self):
"""Initialize the class."""
ExpatParser.__init__(self)
self.firsttime = True
def feed(self, data, isFinal=0):
# Error messages returned by the ScanProsite server are formatted as
# as plain text instead of an XML document. To catch such error
# messages, we override the feed method of the Expat parser.
# The error message is (hopefully) contained in the data that was just
# fed to the parser.
if self.firsttime:
if data[:5].decode('utf-8') != "<?xml":
raise ValueError(data)
self.firsttime = False
return ExpatParser.feed(self, data, isFinal)
class ContentHandler(handler.ContentHandler):
integers = ("start", "stop")
strings = ("sequence_ac",
"sequence_id",
"sequence_db",
"signature_ac",
"level",
"level_tag")
def __init__(self):
"""Initialize the class."""
self.element = []
def startElement(self, name, attrs):
self.element.append(name)
self.content = ""
if self.element == ["matchset"]:
self.record = Record()
self.record.n_match = int(attrs["n_match"])
self.record.n_seq = int(attrs["n_seq"])
elif self.element == ["matchset", "match"]:
match = {}
self.record.append(match)
def endElement(self, name):
assert name == self.element.pop()
name = str(name)
if self.element == ["matchset", "match"]:
match = self.record[-1]
if name in ContentHandler.integers:
match[name] = int(self.content)
elif name in ContentHandler.strings:
match[name] = self.content
else:
# Unknown type, treat it as a string
match[name] = self.content
def characters(self, content):
self.content += content
|