File: xml.py

package info (click to toggle)
python-scrapy 0.14.4-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 3,064 kB
  • sloc: python: 19,468; xml: 199; sh: 134; makefile: 67
file content (24 lines) | stat: -rw-r--r-- 743 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
"""
This module implements the XmlResponse class which adds encoding
discovering through XML encoding declarations to the TextResponse class.

See documentation in docs/topics/request-response.rst
"""

import re

from scrapy.http.response.text import TextResponse
from scrapy.utils.python import memoizemethod_noargs

class XmlResponse(TextResponse):

    _template = r'''%s\s*=\s*["']?\s*%s\s*["']?'''
    _encoding_re  = _template % ('encoding', r'(?P<charset>[\w-]+)')
    XMLDECL_RE  = re.compile(r'<\?xml\s.*?%s' % _encoding_re, re.I)

    @memoizemethod_noargs
    def _body_declared_encoding(self):
        chunk = self.body[:5000]
        match = self.XMLDECL_RE.search(chunk)
        return match.group('charset') if match else None