1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
|
import json
import pathlib
import unittest
from typing import Callable
from recipe_scrapers import SCRAPERS, scrape_html
from recipe_scrapers._exceptions import StaticValueException
from recipe_scrapers._grouping_utils import IngredientGroup
MANDATORY_TESTS = [
"author",
"canonical_url",
"host",
"image",
"ingredients",
"instructions_list",
"language",
"site_name",
"title",
"total_time",
"yields",
]
OPTIONAL_TESTS = [
"ingredient_groups",
"instructions",
"category",
"description",
"cook_time",
"cuisine",
"nutrients",
"prep_time",
"cooking_method",
"keywords",
"ratings",
"equipment",
"ratings_count",
"dietary_restrictions",
]
class RecipeTestCase(unittest.TestCase):
maxDiff = None
been_wild = False
def test_func_factory(
host: str, testhtml: pathlib.Path, testjson: pathlib.Path
) -> Callable:
"""
Factory function to create a test function that asserts the actual output from
the scraper matches the expected output.
Parameters
----------
host : str
Host of the site, used to identify the correct scraper to use*
testhtml : pathlib.Path
Path to testhtml file that the scraper will parse..
testjson : pathlib.Path
Path to testjson file that contains the expected output from the scraper
for the testhtml file.
* We can't use the canonical url from the expected output to determine the scraper
that should be used because some website that aggregate recipes from others site will
set the canonical url to the site the recipe came from. tastykitchen.com is an example
of this.
Returns
-------
Callable
Function that asserts the expected output from the scraper matches the
actual output.
"""
def test_func(self):
with open(testjson, encoding="utf-8") as f:
expect = json.load(f)
expect["ingredient_groups"] = (
[
IngredientGroup(**group)
for group in expect.get("ingredient_groups", [])
]
if "ingredient_groups" in expect
else [IngredientGroup(expect["ingredients"], purpose=None)]
)
supported_only = host in SCRAPERS
actual = scrape_html(
html=testhtml.read_text(encoding="utf-8"),
org_url=host,
online=False,
supported_only=supported_only,
)
if not supported_only:
self.assertFalse(self.been_wild, "Only one wild mode test should occur.")
type(self).been_wild = True
# Mandatory tests
# If the key isn't present, check an assertion is raised
for key in MANDATORY_TESTS:
with self.subTest(key):
scraper_func = getattr(actual, key)
if key in expect.keys():
try:
return_value = scraper_func()
except StaticValueException as e:
return_value = e.return_value
self.assertEqual(
expect[key],
return_value,
msg=f"The actual value for .{key}() did not match the expected value.",
)
else:
with self.assertRaises(
Exception,
msg=f".{key}() was expected to raise an exception but it did not.",
):
scraper_func()
# Optional tests
for key in OPTIONAL_TESTS:
if key not in expect:
continue # If the key isn't present, skip
with self.subTest(key):
scraper_func = getattr(actual, key)
try:
return_value = scraper_func()
except StaticValueException as e:
return_value = e.return_value
self.assertEqual(
expect[key],
return_value,
msg=f"The actual value for .{key}() did not match the expected value.",
)
grouped = []
for group in actual.ingredient_groups():
grouped.extend(group.ingredients)
with self.subTest("ingredient_groups"):
self.assertEqual(sorted(actual.ingredients()), sorted(grouped))
if "instructions_list" in expect:
list_instructions_normalized = [
line.strip() for line in expect["instructions_list"] if line.strip()
]
string_instructions_normalized = [
instruction.strip()
for instruction in actual.instructions().split("\n")
if instruction.strip()
]
with self.subTest("instructions_list vs instructions comparison"):
self.assertEqual(
string_instructions_normalized,
list_instructions_normalized,
msg="The actual value for .instructions() did not match the value from instructions_list.",
)
return test_func
def prepare_test_cases():
"""
This function dynamically generates the class definition for RecipeTestCase by adding
a test function for each pair of testhtml and testjson files found in the
tests/test_data directory.
"""
test_dir = pathlib.Path("tests/test_data")
for host in test_dir.iterdir():
if not host.is_dir():
continue
for testhtml in host.glob("*.testhtml"):
testjson = testhtml.with_suffix(".json")
if not testjson.is_file():
continue
# Add a new function to RecipeTestCase class to test this scraper
# The name of this function the path to the testjson file.
setattr(
RecipeTestCase,
str(testjson),
test_func_factory(host.name, testhtml, testjson),
)
prepare_test_cases()
def load_tests(
loader: unittest.TestLoader, standard_tests: unittest.TestSuite, pattern: str
) -> unittest.TestSuite:
"""
Customise the loading of tests. This function is automatically picked up by the
unittest test loader.
This also includes the library tests from the tests/library folder as well.
Parameters
----------
loader : unittest.TestLoader
The instance of TestLoader loading the tests when unittest is run
standard_tests : unittest.TestSuite
The tests found by loader by loading the tests from the tests module.
This is empty and unused.
pattern : str
Pattern used to identify tests to load.
This is unused.
Returns
-------
unittest.TestSuite
A TestSuite object populated with tests from the pairs of testhtml and testjson
files, and the library tests.
"""
# Create a test suite and load all tests from the RecipeTestClass definition
suite = unittest.TestSuite()
tests = loader.loadTestsFromTestCase(RecipeTestCase)
suite.addTest(tests)
# Add library tests to test suite
library_tests = loader.discover("tests/library")
suite.addTests(library_tests)
return suite
|