1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
|
import parsel
import pytest
from packaging import version
from web_poet.mixins import ResponseShortcutsMixin
from web_poet.page_inputs import HttpResponse
PARSEL_VERSION = version.parse(getattr(parsel, "__version__", "0.0"))
PARSEL_18_PLUS = PARSEL_VERSION >= version.parse("1.8.0") # noqa: SIM300
class MyPage(ResponseShortcutsMixin):
def __init__(self, response: HttpResponse):
self.response = response
@pytest.fixture
def my_page(book_list_html_response):
return MyPage(book_list_html_response)
@pytest.fixture
def my_json_page(some_json_response):
return MyPage(some_json_response)
def test_url(my_page) -> None:
assert my_page.url == "http://books.toscrape.com/index.html"
def test_html(my_page, book_list_html) -> None:
assert my_page.html == book_list_html
def test_xpath(my_page) -> None:
title = my_page.xpath(".//title/text()").get().strip()
assert title == "All products | Books to Scrape - Sandbox"
@pytest.mark.skipif(not PARSEL_18_PLUS, reason="parsel < 1.8 doesn't support jmespath")
def test_jmespath(my_json_page) -> None:
for obj in [my_json_page, my_json_page.response]:
name = obj.jmespath("website.name").get()
assert name == "homepage"
@pytest.mark.skipif(PARSEL_18_PLUS, reason="parsel >= 1.8 supports jmespath")
def test_jmespath_not_available(my_json_page) -> None:
for obj in [my_json_page, my_json_page.response]:
with pytest.raises(AttributeError):
obj.jmespath("website.name").get()
def test_css(my_page) -> None:
title = my_page.css("title::text").get().strip()
assert title == "All products | Books to Scrape - Sandbox"
def test_baseurl(my_page) -> None:
assert my_page.base_url == "http://books.toscrape.com/index.html"
def test_urljoin(my_page) -> None:
assert my_page.urljoin("foo") == "http://books.toscrape.com/foo"
def test_custom_baseurl() -> None:
body = b"""
<html>
<head>
<base href="http://example.com/foo/">
</head>
<body><body>
</html>
"""
response = HttpResponse(
url="http://www.example.com/path",
body=body,
)
page = MyPage(response=response)
assert page.url == "http://www.example.com/path"
assert page.base_url == "http://example.com/foo/"
assert page.urljoin("bar") == "http://example.com/foo/bar"
assert page.urljoin("http://example.com/1") == "http://example.com/1"
|