1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
|
import unittest
from recipe_scrapers._factory import SchemaScraperFactory
from recipe_scrapers._schemaorg import SchemaOrg
from recipe_scrapers.settings import settings
JSONLD_PAGE_TEMPLATE = """
<html>
<head>
<link href="http://recipe.test/template" type="canonical" />
<script type="application/ld+json">{jsonld}</script>
</head>
</html>
"""
SIMPLE_SCHEMA = """
{
"@context": "https://schema.org",
"@type": "Recipe",
"name": "Test Recipe",
"recipeIngredient": ["1 slice of bread", "5g margarine"],
"recipeInstructions": ["spread the margarine on the bread"]
}
"""
MULTI_ENTITY_SCHEMA = """
[
{
"@context": "https://schema.org",
"@type": "Recipe",
"@id": "http://recipe.test/template",
"name": "Test Recipe",
},
{
"@context": "https://schema.org",
"@type": "Recipe",
"@id": "http://recipe.test/other",
"name": "Another great test recipe",
},
{
"@context": "https://schema.org",
"@type": "WebPage",
"@id": "http://recipe.test/template",
"mainEntity": {
"@context": "https://schema.org",
"@type": "Recipe",
"@id": "http://recipe.test/template",
"recipeIngredient": ["1 slice of bread", "5g margarine"],
"recipeInstructions": ["spread the margarine on the bread"]
}
}
]
"""
BEST_IMAGE_SCHEMA = """
{
"@context": "https://schema.org",
"@type": "Recipe",
"name": "Image Comparison",
"image": [
"https://images.example.com/recipe-320x240.jpg",
{
"@type": "ImageObject",
"url": "https://images.example.com/recipe-1280x720.jpg",
"width": 1280,
"height": 720
},
"https://images.example.com/recipe-640x480.jpg"
],
"recipeIngredient": [],
"recipeInstructions": []
}
"""
class TestSchemaOrg(unittest.TestCase):
def test_simple(self):
page_data = JSONLD_PAGE_TEMPLATE.format(jsonld=SIMPLE_SCHEMA)
parser = SchemaOrg(page_data)
self.assertEqual("Test Recipe", parser.title())
self.assertIn("1 slice of bread", parser.ingredients())
self.assertIn("5g margarine", parser.ingredients())
self.assertEqual("spread the margarine on the bread", parser.instructions())
def test_multi_entity_aggregation(self):
page_data = JSONLD_PAGE_TEMPLATE.format(jsonld=MULTI_ENTITY_SCHEMA)
parser = SchemaOrg(page_data)
self.assertEqual("Test Recipe", parser.title())
self.assertIn("1 slice of bread", parser.ingredients())
self.assertIn("5g margarine", parser.ingredients())
self.assertEqual("spread the margarine on the bread", parser.instructions())
def test_best_image_selection(self):
page_data = JSONLD_PAGE_TEMPLATE.format(jsonld=BEST_IMAGE_SCHEMA)
default_scraper = SchemaScraperFactory.generate(
html=page_data,
url="http://recipe.test/template",
)
self.assertEqual(
"https://images.example.com/recipe-1280x720.jpg",
default_scraper.image(),
)
best_image_scraper = SchemaScraperFactory.generate(
html=page_data,
url="http://recipe.test/template",
best_image=False,
)
self.assertEqual(
"https://images.example.com/recipe-320x240.jpg",
best_image_scraper.image(),
)
def test_best_image_setting_toggle(self):
page_data = JSONLD_PAGE_TEMPLATE.format(jsonld=BEST_IMAGE_SCHEMA)
original = settings.BEST_IMAGE_SELECTION
try:
settings.BEST_IMAGE_SELECTION = True
configured_scraper = SchemaScraperFactory.generate(
html=page_data,
url="http://recipe.test/template",
)
self.assertEqual(
"https://images.example.com/recipe-1280x720.jpg",
configured_scraper.image(),
)
finally:
settings.BEST_IMAGE_SELECTION = original
|