1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
|
"""
This module implements the XmlResponse class which adds encoding
discovering through XML encoding declarations to the TextResponse class.
See documentation in docs/topics/request-response.rst
"""
import re
from scrapy.http.response.text import TextResponse
from scrapy.utils.python import memoizemethod_noargs
class XmlResponse(TextResponse):
__slots__ = ()
_template = r'''%s\s*=\s*["']?\s*%s\s*["']?'''
_encoding_re = _template % ('encoding', r'(?P<charset>[\w-]+)')
XMLDECL_RE = re.compile(r'<\?xml\s.*?%s' % _encoding_re, re.I)
def body_encoding(self):
return self._body_declared_encoding() or super(XmlResponse, self).body_encoding()
@memoizemethod_noargs
def _body_declared_encoding(self):
chunk = self.body[:5000]
match = self.XMLDECL_RE.search(chunk)
return match.group('charset') if match else None
|