File: test_nested_loader.py

package info (click to toggle)
python-itemloaders 1.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 312 kB
  • sloc: python: 1,497; makefile: 78
file content (116 lines) | stat: -rw-r--r-- 4,125 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from parsel import Selector

from itemloaders import ItemLoader


class TestSubselectorLoader:
    selector = Selector(
        text="""
    <html>
    <body>
    <header>
      <div id="id">marta</div>
      <p>paragraph</p>
    </header>
    <footer class="footer">
      <a href="http://www.scrapy.org">homepage</a>
      <img src="/images/logo.png" width="244" height="65" alt="Scrapy">
    </footer>
    </body>
    </html>
    """
    )

    def test_nested_xpath(self):
        loader = ItemLoader(selector=self.selector)
        nl = loader.nested_xpath("//header")
        nl.add_xpath("name", "div/text()")
        nl.add_css("name_div", "#id")
        assert nl.selector
        nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall())

        assert loader.get_output_value("name") == ["marta"]
        assert loader.get_output_value("name_div") == ['<div id="id">marta</div>']
        assert loader.get_output_value("name_value") == ["marta"]

        assert loader.get_output_value("name") == nl.get_output_value("name")
        assert loader.get_output_value("name_div") == nl.get_output_value("name_div")
        assert loader.get_output_value("name_value") == nl.get_output_value(
            "name_value"
        )

    def test_nested_css(self):
        loader = ItemLoader(selector=self.selector)
        nl = loader.nested_css("header")
        nl.add_xpath("name", "div/text()")
        nl.add_css("name_div", "#id")
        assert nl.selector
        nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall())

        assert loader.get_output_value("name") == ["marta"]
        assert loader.get_output_value("name_div") == ['<div id="id">marta</div>']
        assert loader.get_output_value("name_value") == ["marta"]

        assert loader.get_output_value("name") == nl.get_output_value("name")
        assert loader.get_output_value("name_div") == nl.get_output_value("name_div")
        assert loader.get_output_value("name_value") == nl.get_output_value(
            "name_value"
        )

    def test_nested_replace(self):
        loader = ItemLoader(selector=self.selector)
        nl1 = loader.nested_xpath("//footer")
        nl2 = nl1.nested_xpath("a")

        loader.add_xpath("url", "//footer/a/@href")
        assert loader.get_output_value("url") == ["http://www.scrapy.org"]
        nl1.replace_xpath("url", "img/@src")
        assert loader.get_output_value("url") == ["/images/logo.png"]
        nl2.replace_xpath("url", "@href")
        assert loader.get_output_value("url") == ["http://www.scrapy.org"]

    def test_nested_ordering(self):
        loader = ItemLoader(selector=self.selector)
        nl1 = loader.nested_xpath("//footer")
        nl2 = nl1.nested_xpath("a")

        nl1.add_xpath("url", "img/@src")
        loader.add_xpath("url", "//footer/a/@href")
        nl2.add_xpath("url", "text()")
        loader.add_xpath("url", "//footer/a/@href")

        assert loader.get_output_value("url") == [
            "/images/logo.png",
            "http://www.scrapy.org",
            "homepage",
            "http://www.scrapy.org",
        ]

    def test_nested_load_item(self):
        loader = ItemLoader(selector=self.selector)
        nl1 = loader.nested_xpath("//footer")
        nl2 = nl1.nested_xpath("img")

        loader.add_xpath("name", "//header/div/text()")
        nl1.add_xpath("url", "a/@href")
        nl2.add_xpath("image", "@src")

        item = loader.load_item()

        assert item is loader.item
        assert item is nl1.item
        assert item is nl2.item

        assert item["name"] == ["marta"]
        assert item["url"] == ["http://www.scrapy.org"]
        assert item["image"] == ["/images/logo.png"]

    def test_nested_empty_selector(self):
        loader = ItemLoader(selector=self.selector)
        nested_xpath = loader.nested_xpath("//bar")
        assert isinstance(nested_xpath, ItemLoader)
        nested_xpath.add_xpath("foo", "./foo")

        nested_css = loader.nested_css("bar")
        assert isinstance(nested_css, ItemLoader)
        nested_css.add_css("foo", "foo")