1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
|
# mypy: disallow_untyped_defs=False
import json
import unittest
from extruct.jsonld import JsonLdExtractor
from tests import get_testdata
class TestJsonLD(unittest.TestCase):
def test_schemaorg_CreativeWork(self):
self.assertJsonLdCorrect(folder="schema.org", page="CreativeWork.001")
def test_songkick(self):
self.assertJsonLdCorrect(
folder="songkick",
page="Elysian Fields Brooklyn Tickets, The Owl Music Parlor, 31 Oct 2015",
)
def test_jsonld_empty_item(self):
self.assertJsonLdCorrect(folder="songkick", page="jsonld_empty_item_test")
def test_jsonld_with_comments(self):
for page in ["JoinAction.001", "AllocateAction.001"]:
self.assertJsonLdCorrect(folder="schema.org.invalid", page=page)
for page in ["JoinAction.001", "AllocateAction.001"]:
self.assertJsonLdCorrect(folder="custom.invalid", page=page)
def test_jsonld_with_control_characters(self):
self.assertJsonLdCorrect(
folder="custom.invalid", page="JSONLD_with_control_characters"
)
def test_jsonld_with_control_characters_comment(self):
self.assertJsonLdCorrect(
folder="custom.invalid", page="JSONLD_with_control_characters_comment"
)
def test_jsonld_with_json_including_js_comment(self):
self.assertJsonLdCorrect(folder="custom.invalid", page="JSONLD_with_JS_comment")
def assertJsonLdCorrect(self, folder, page):
body, expected = self._get_body_expected(folder, page)
self._check_jsonld(body, expected)
def _get_body_expected(self, folder, page):
body = get_testdata(folder, "{}.html".format(page))
expected = get_testdata(folder, "{}.jsonld".format(page))
return body, json.loads(expected.decode("utf8"))
def _check_jsonld(self, body, expected):
jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertEqual(data, expected)
def test_null(self):
page = "null_ld_mock"
body = get_testdata("misc", "{}.html".format(page))
expected = json.loads(
get_testdata("misc", "{}.jsonld".format(page)).decode("UTF-8")
)
jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertEqual(data, expected)
|