File: test_schemaorg.py

package info (click to toggle)
python-recipe-scrapers 15.10.0-1
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 289,728 kB
  • sloc: python: 14,218; makefile: 3
file content (132 lines) | stat: -rw-r--r-- 3,917 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import unittest

from recipe_scrapers._factory import SchemaScraperFactory
from recipe_scrapers._schemaorg import SchemaOrg
from recipe_scrapers.settings import settings

JSONLD_PAGE_TEMPLATE = """
<html>
<head>
<link href="http://recipe.test/template" type="canonical" />
<script type="application/ld+json">{jsonld}</script>
</head>
</html>
"""

SIMPLE_SCHEMA = """
{
  "@context": "https://schema.org",
  "@type": "Recipe",
  "name": "Test Recipe",
  "recipeIngredient": ["1 slice of bread", "5g margarine"],
  "recipeInstructions": ["spread the margarine on the bread"]
}
"""

MULTI_ENTITY_SCHEMA = """
[
  {
    "@context": "https://schema.org",
    "@type": "Recipe",
    "@id": "http://recipe.test/template",
    "name": "Test Recipe",
  },
  {
    "@context": "https://schema.org",
    "@type": "Recipe",
    "@id": "http://recipe.test/other",
    "name": "Another great test recipe",
  },
  {
    "@context": "https://schema.org",
    "@type": "WebPage",
    "@id": "http://recipe.test/template",
    "mainEntity": {
      "@context": "https://schema.org",
      "@type": "Recipe",
      "@id": "http://recipe.test/template",
      "recipeIngredient": ["1 slice of bread", "5g margarine"],
      "recipeInstructions": ["spread the margarine on the bread"]
    }
  }
]
"""
BEST_IMAGE_SCHEMA = """
{
  "@context": "https://schema.org",
  "@type": "Recipe",
  "name": "Image Comparison",
  "image": [
    "https://images.example.com/recipe-320x240.jpg",
    {
      "@type": "ImageObject",
      "url": "https://images.example.com/recipe-1280x720.jpg",
      "width": 1280,
      "height": 720
    },
    "https://images.example.com/recipe-640x480.jpg"
  ],
  "recipeIngredient": [],
  "recipeInstructions": []
}
"""


class TestSchemaOrg(unittest.TestCase):

    def test_simple(self):
        page_data = JSONLD_PAGE_TEMPLATE.format(jsonld=SIMPLE_SCHEMA)
        parser = SchemaOrg(page_data)

        self.assertEqual("Test Recipe", parser.title())
        self.assertIn("1 slice of bread", parser.ingredients())
        self.assertIn("5g margarine", parser.ingredients())
        self.assertEqual("spread the margarine on the bread", parser.instructions())

    def test_multi_entity_aggregation(self):
        page_data = JSONLD_PAGE_TEMPLATE.format(jsonld=MULTI_ENTITY_SCHEMA)
        parser = SchemaOrg(page_data)

        self.assertEqual("Test Recipe", parser.title())
        self.assertIn("1 slice of bread", parser.ingredients())
        self.assertIn("5g margarine", parser.ingredients())
        self.assertEqual("spread the margarine on the bread", parser.instructions())

    def test_best_image_selection(self):
        page_data = JSONLD_PAGE_TEMPLATE.format(jsonld=BEST_IMAGE_SCHEMA)

        default_scraper = SchemaScraperFactory.generate(
            html=page_data,
            url="http://recipe.test/template",
        )
        self.assertEqual(
            "https://images.example.com/recipe-1280x720.jpg",
            default_scraper.image(),
        )

        best_image_scraper = SchemaScraperFactory.generate(
            html=page_data,
            url="http://recipe.test/template",
            best_image=False,
        )
        self.assertEqual(
            "https://images.example.com/recipe-320x240.jpg",
            best_image_scraper.image(),
        )

    def test_best_image_setting_toggle(self):
        page_data = JSONLD_PAGE_TEMPLATE.format(jsonld=BEST_IMAGE_SCHEMA)

        original = settings.BEST_IMAGE_SELECTION
        try:
            settings.BEST_IMAGE_SELECTION = True
            configured_scraper = SchemaScraperFactory.generate(
                html=page_data,
                url="http://recipe.test/template",
            )
            self.assertEqual(
                "https://images.example.com/recipe-1280x720.jpg",
                configured_scraper.image(),
            )
        finally:
            settings.BEST_IMAGE_SELECTION = original