1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
|
import pathlib
import unittest
from unittest import mock
from warnings import catch_warnings, simplefilter
from recipe_scrapers import (
NoSchemaFoundInWildMode,
WebsiteNotImplementedError,
get_supported_urls,
scrape_html,
scraper_exists_for,
)
from recipe_scrapers._utils import get_host_name
class TestMainMethods(unittest.TestCase):
def test_valid_call_formats(self):
test_html = "<!-- load this variable with the HTML from the URL below -->"
test_url = "https://en.wikibooks.org/wiki/Cookbook:B%C3%A9chamel_Sauce_(Beeton)"
# These calls should all be equivalent and valid.
scrape_html(test_html, test_url)
scrape_html(test_html, org_url=test_url)
scrape_html(test_html, org_url=test_url) # short for 'original url'
# scrape_html(html=test_html, url=test_url) # TODO
def test_invalid_call_formats(self):
invalid_combinations = (
(True, True),
(True, False),
(False, True),
(False, False),
)
for supported_only, wild_mode in invalid_combinations:
with self.subTest():
with catch_warnings(record=True) as ws:
with self.assertRaises(ValueError):
scrape_html(
html="<html></html>",
org_url="https://recipe-scrapers.example/",
supported_only=supported_only,
wild_mode=wild_mode,
)
self.assertTrue(
any(isinstance(w.category, DeprecationWarning) for w in ws)
)
def test_get_supported_urls(self):
urls = get_supported_urls()
self.assertGreater(len(urls), 200)
self.assertIn(get_host_name("https://www.hellofresh.nl/"), urls)
self.assertIn(get_host_name("https://hellofresh.com/"), urls)
def test_scraper_exists_for(self):
self.assertFalse(scraper_exists_for("example.com"))
self.assertTrue(scraper_exists_for("https://www.hellofresh.nl/"))
self.assertTrue(
scraper_exists_for("https://eatsmarter.de/rezepte/gruenkohl-kokos-suppe")
)
@mock.patch("recipe_scrapers.requests.get")
def test_offline_no_html_retrieval(self, mock_get):
with self.assertRaises(ValueError):
scrape_html(
html=None,
org_url="https://recipe-scrapers.example/algorithmic-cupcakes.html",
online=False,
supported_only=False,
)
assert not mock_get.called
@mock.patch("recipe_scrapers.requests.get")
def test_online_mode_html_retrieval(self, mock_get):
recipe_html = pathlib.Path(
"tests/test_data/recipe-scrapers.example/online.testhtml"
)
mock_get.return_value = mock.MagicMock()
mock_get.return_value.text = recipe_html.read_text()
with catch_warnings(record=True) as ws:
simplefilter("always", category=DeprecationWarning)
scrape_html(
html=None,
org_url="https://recipe-scrapers.example/algorithmic-cupcakes.html",
online=True,
supported_only=False,
)
self.assertTrue(any(w.category is DeprecationWarning for w in ws))
assert mock_get.called
def test_unsupported_website(self):
html, url = (
"<!DOCTYPE html><html><body>arbitrary</body></html>",
"https://unsupported.recipe-scrapers.example/unavailable.html",
)
with self.assertRaises(WebsiteNotImplementedError):
scrape_html(html=html, org_url=url, online=False)
with self.assertRaises(WebsiteNotImplementedError):
scrape_html(html=html, org_url=url, online=False, supported_only=True)
with self.assertRaises(NoSchemaFoundInWildMode):
with catch_warnings(record=True) as ws:
simplefilter("always", category=DeprecationWarning)
scrape_html(html=html, org_url=url, online=False, wild_mode=True)
self.assertTrue(any(w.category is DeprecationWarning for w in ws))
|