File: __init__.py

package info (click to toggle)
python-recipe-scrapers 15.9.0-1
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 246,580 kB
  • sloc: python: 13,214; makefile: 3
file content (233 lines) | stat: -rw-r--r-- 7,336 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import json
import pathlib
import unittest
from typing import Callable

from recipe_scrapers import SCRAPERS, scrape_html
from recipe_scrapers._exceptions import StaticValueException
from recipe_scrapers._grouping_utils import IngredientGroup

MANDATORY_TESTS = [
    "author",
    "canonical_url",
    "host",
    "image",
    "ingredients",
    "instructions_list",
    "language",
    "site_name",
    "title",
    "total_time",
    "yields",
]

OPTIONAL_TESTS = [
    "ingredient_groups",
    "instructions",
    "category",
    "description",
    "cook_time",
    "cuisine",
    "nutrients",
    "prep_time",
    "cooking_method",
    "keywords",
    "ratings",
    "equipment",
    "ratings_count",
    "dietary_restrictions",
]


class RecipeTestCase(unittest.TestCase):
    maxDiff = None
    been_wild = False


def test_func_factory(
    host: str, testhtml: pathlib.Path, testjson: pathlib.Path
) -> Callable:
    """
    Factory function to create a test function that asserts the actual output from
    the scraper matches the expected output.

    Parameters
    ----------
    host : str
        Host of the site, used to identify the correct scraper to use*
    testhtml : pathlib.Path
        Path to testhtml file that the scraper will parse..
    testjson : pathlib.Path
        Path to testjson file that contains the expected output from the scraper
        for the testhtml file.


    * We can't use the canonical url from the expected output to determine the scraper
    that should be used because some website that aggregate recipes from others site will
    set the canonical url to the site the recipe came from. tastykitchen.com is an example
    of this.


    Returns
    -------
    Callable
        Function that asserts the expected output from the scraper matches the
        actual output.
    """

    def test_func(self):
        with open(testjson, encoding="utf-8") as f:
            expect = json.load(f)
            expect["ingredient_groups"] = (
                [
                    IngredientGroup(**group)
                    for group in expect.get("ingredient_groups", [])
                ]
                if "ingredient_groups" in expect
                else [IngredientGroup(expect["ingredients"], purpose=None)]
            )
        supported_only = host in SCRAPERS
        actual = scrape_html(
            html=testhtml.read_text(encoding="utf-8"),
            org_url=host,
            online=False,
            supported_only=supported_only,
        )
        if not supported_only:
            self.assertFalse(self.been_wild, "Only one wild mode test should occur.")
            type(self).been_wild = True

        # Mandatory tests
        # If the key isn't present, check an assertion is raised
        for key in MANDATORY_TESTS:
            with self.subTest(key):
                scraper_func = getattr(actual, key)
                if key in expect.keys():
                    try:
                        return_value = scraper_func()
                    except StaticValueException as e:
                        return_value = e.return_value

                    self.assertEqual(
                        expect[key],
                        return_value,
                        msg=f"The actual value for .{key}() did not match the expected value.",
                    )
                else:
                    with self.assertRaises(
                        Exception,
                        msg=f".{key}() was expected to raise an exception but it did not.",
                    ):
                        scraper_func()

        # Optional tests
        for key in OPTIONAL_TESTS:
            if key not in expect:
                continue  # If the key isn't present, skip
            with self.subTest(key):
                scraper_func = getattr(actual, key)
                try:
                    return_value = scraper_func()
                except StaticValueException as e:
                    return_value = e.return_value

                self.assertEqual(
                    expect[key],
                    return_value,
                    msg=f"The actual value for .{key}() did not match the expected value.",
                )

        grouped = []
        for group in actual.ingredient_groups():
            grouped.extend(group.ingredients)

        with self.subTest("ingredient_groups"):
            self.assertEqual(sorted(actual.ingredients()), sorted(grouped))

        if "instructions_list" in expect:
            list_instructions_normalized = [
                line.strip() for line in expect["instructions_list"] if line.strip()
            ]

            string_instructions_normalized = [
                instruction.strip()
                for instruction in actual.instructions().split("\n")
                if instruction.strip()
            ]

            with self.subTest("instructions_list vs instructions comparison"):
                self.assertEqual(
                    string_instructions_normalized,
                    list_instructions_normalized,
                    msg="The actual value for .instructions() did not match the value from instructions_list.",
                )

    return test_func


def prepare_test_cases():
    """
    This function dynamically generates the class definition for RecipeTestCase by adding
    a test function for each pair of testhtml and testjson files found in the
    tests/test_data directory.
    """
    test_dir = pathlib.Path("tests/test_data")
    for host in test_dir.iterdir():
        if not host.is_dir():
            continue

        for testhtml in host.glob("*.testhtml"):
            testjson = testhtml.with_suffix(".json")
            if not testjson.is_file():
                continue

            # Add a new function to RecipeTestCase class to test this scraper
            # The name of this function the path to the testjson file.
            setattr(
                RecipeTestCase,
                str(testjson),
                test_func_factory(host.name, testhtml, testjson),
            )


prepare_test_cases()


def load_tests(
    loader: unittest.TestLoader, standard_tests: unittest.TestSuite, pattern: str
) -> unittest.TestSuite:
    """
    Customise the loading of tests. This function is automatically picked up by the
    unittest test loader.

    This also includes the library tests from the tests/library folder as well.


    Parameters
    ----------
    loader : unittest.TestLoader
        The instance of TestLoader loading the tests when unittest is run
    standard_tests : unittest.TestSuite
        The tests found by loader by loading the tests from the tests module.
        This is empty and unused.
    pattern : str
        Pattern used to identify tests to load.
        This is unused.

    Returns
    -------
    unittest.TestSuite
        A TestSuite object populated with tests from the pairs of testhtml and testjson
        files, and the library tests.
    """

    # Create a test suite and load all tests from the RecipeTestClass definition
    suite = unittest.TestSuite()
    tests = loader.loadTestsFromTestCase(RecipeTestCase)
    suite.addTest(tests)

    # Add library tests to test suite
    library_tests = loader.discover("tests/library")
    suite.addTests(library_tests)

    return suite