File: test_mixins.py

package info (click to toggle)
python-web-poet 0.23.2-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 908 kB
  • sloc: python: 6,112; makefile: 19
file content (85 lines) | stat: -rw-r--r-- 2,422 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import parsel
import pytest
from packaging import version

from web_poet.mixins import ResponseShortcutsMixin
from web_poet.page_inputs import HttpResponse

PARSEL_VERSION = version.parse(getattr(parsel, "__version__", "0.0"))
PARSEL_18_PLUS = PARSEL_VERSION >= version.parse("1.8.0")  # noqa: SIM300


class MyPage(ResponseShortcutsMixin):
    def __init__(self, response: HttpResponse):
        self.response = response


@pytest.fixture
def my_page(book_list_html_response):
    return MyPage(book_list_html_response)


@pytest.fixture
def my_json_page(some_json_response):
    return MyPage(some_json_response)


def test_url(my_page) -> None:
    assert my_page.url == "http://books.toscrape.com/index.html"


def test_html(my_page, book_list_html) -> None:
    assert my_page.html == book_list_html


def test_xpath(my_page) -> None:
    title = my_page.xpath(".//title/text()").get().strip()
    assert title == "All products | Books to Scrape - Sandbox"


@pytest.mark.skipif(not PARSEL_18_PLUS, reason="parsel < 1.8 doesn't support jmespath")
def test_jmespath(my_json_page) -> None:
    for obj in [my_json_page, my_json_page.response]:
        name = obj.jmespath("website.name").get()
        assert name == "homepage"


@pytest.mark.skipif(PARSEL_18_PLUS, reason="parsel >= 1.8 supports jmespath")
def test_jmespath_not_available(my_json_page) -> None:
    for obj in [my_json_page, my_json_page.response]:
        with pytest.raises(AttributeError):
            obj.jmespath("website.name").get()


def test_css(my_page) -> None:
    title = my_page.css("title::text").get().strip()
    assert title == "All products | Books to Scrape - Sandbox"


def test_baseurl(my_page) -> None:
    assert my_page.base_url == "http://books.toscrape.com/index.html"


def test_urljoin(my_page) -> None:
    assert my_page.urljoin("foo") == "http://books.toscrape.com/foo"


def test_custom_baseurl() -> None:
    body = b"""
    <html>
    <head>
        <base href="http://example.com/foo/">
    </head>
    <body><body>
    </html>
    """
    response = HttpResponse(
        url="http://www.example.com/path",
        body=body,
    )
    page = MyPage(response=response)

    assert page.url == "http://www.example.com/path"
    assert page.base_url == "http://example.com/foo/"
    assert page.urljoin("bar") == "http://example.com/foo/bar"
    assert page.urljoin("http://example.com/1") == "http://example.com/1"