File: test_contrib_exp_loader_lxmlloader.py

package info (click to toggle)
python-scrapy 0.8-3
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 2,904 kB
  • ctags: 2,981
  • sloc: python: 15,349; xml: 199; makefile: 68; sql: 64; sh: 34
file content (67 lines) | stat: -rw-r--r-- 2,111 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from twisted.trial import unittest

from scrapy.contrib.loader.processor import MapCompose
from scrapy.item import Item, Field
from scrapy.http import HtmlResponse

try:
    import lxml
except ImportError:
    lxml = False


class TestItem(Item):
    name = Field()


if lxml:
    from scrapy.contrib_exp.loader.lxmlloader import LxmlItemLoader

    class TestLxmlItemLoader(LxmlItemLoader):
        default_item_class = TestItem


class LxmlItemLoaderTest(unittest.TestCase):
    response = HtmlResponse(url="", body='<html><body><div id="id">marta</div><p>paragraph</p></body></html>')

    def setUp(self):
        if not lxml:
            raise unittest.SkipTest("lxml is not available")

    def test_constructor_with_response(self):
        l = TestLxmlItemLoader(response=self.response)
        self.assert_(l.tree)

    def test_add_xpath(self):
        l = TestLxmlItemLoader(response=self.response)
        l.add_xpath('name', '//div')
        self.assertEqual(l.get_output_value('name'), [u'<div id="id">marta</div>'])

    def test_add_xpath_text(self):
        l = TestLxmlItemLoader(response=self.response)
        l.add_xpath('name', '//div/text()')
        self.assertEqual(l.get_output_value('name'), [u'marta'])

    def test_replace_xpath(self):
        l = TestLxmlItemLoader(response=self.response)
        l.add_xpath('name', '//div/text()')
        self.assertEqual(l.get_output_value('name'), [u'marta'])
        l.replace_xpath('name', '//p/text()')
        self.assertEqual(l.get_output_value('name'), [u'paragraph'])

    def test_add_css(self):
        l = TestLxmlItemLoader(response=self.response)
        l.add_css('name', '#id')
        self.assertEqual(l.get_output_value('name'), [u'<div id="id">marta</div>'])

    def test_replace_css(self):
        l = TestLxmlItemLoader(response=self.response)
        l.add_css('name', '#id')
        self.assertEqual(l.get_output_value('name'), [u'<div id="id">marta</div>'])
        l.replace_css('name', 'p')
        self.assertEqual(l.get_output_value('name'), [u'<p>paragraph</p>'])


if __name__ == "__main__":
    unittest.main()