File: test_selector_lxmldocument.py

package info (click to toggle)

python-scrapy 0.24.2-1

links: PTS, VCS
area: main
in suites: jessie, jessie-kfreebsd
size: 3,240 kB
ctags: 4,259
sloc: python: 21,170; xml: 199; makefile: 67; sh: 44

file content (26 lines) | stat: -rw-r--r-- 910 bytes

parent folder | download | duplicates (2)

import unittest
from scrapy.selector.lxmldocument import LxmlDocument
from scrapy.http import TextResponse, HtmlResponse


class LxmlDocumentTest(unittest.TestCase):

    def test_caching(self):
        r1 = HtmlResponse('http://www.example.com', body='<html><head></head><body></body></html>')
        r2 = r1.copy()

        doc1 = LxmlDocument(r1)
        doc2 = LxmlDocument(r1)
        doc3 = LxmlDocument(r2)

        # make sure it's cached
        assert doc1 is doc2
        assert doc1 is not doc3

    def test_null_char(self):
        # make sure bodies with null char ('\x00') don't raise a TypeError exception
        body = 'test problematic \x00 body'
        response = TextResponse('http://example.com/catalog/product/blabla-123',
                                headers={'Content-Type': 'text/plain; charset=utf-8'},
                                body=body)
        LxmlDocument(response)