1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
|
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).parent.parent.absolute()))
from rdflib import Dataset, ConjunctiveGraph, Literal
from rdflib.namespace import XSD
def test_small_string():
s = """
["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", ""]
["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", ""]
["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""]
["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""]
["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13:00", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""]
["http://example.com/s01", "http://example.com/age", "42", "http://www.w3.org/2001/XMLSchema#integer", "", ""]
["http://example.com/s01", "http://example.com/trueFalse", "false", ",http://www.w3.org/2001/XMLSchema#boolean", "", ""]
["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "globalId", "", ""]
["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "globalId", "", ""]
["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "globalId", "", ""]
"""
d = Dataset().parse(data=s, format="hext")
assert len(d) == 10
def test_small_file_singlegraph():
d = Dataset().parse(Path(__file__).parent / "test_parser_hext_singlegraph.ndjson", format="hext")
assert len(d) == 10
def test_small_file_multigraph():
d = ConjunctiveGraph()
assert len(d) == 0
d.parse(
Path(__file__).parent / "test_parser_hext_multigraph.ndjson",
format="hext",
publicID=d.default_context.identifier
)
"""There are 22 lines in the file test_parser_hext_multigraph.ndjson. When loaded
into a Dataset, we get only 18 quads since the the dataset can contextualise
the triples and thus deduplicate 4."""
total_triples = 0
# count all the triples in the Dataset
for context in d.contexts():
for triple in context.triples((None, None, None)):
total_triples += 1
assert total_triples == 18
def test_roundtrip():
# these are some RDF files that HexT can round-trip since the have no
# literals with no datatype declared:
TEST_DIR = Path(__file__).parent.absolute() / "nt"
files_to_skip = {
"paths-04.nt": "subject literal",
"even_more_literals.nt": "JSON decoding error",
"literals-02.nt": "JSON decoding error",
"more_literals.nt": "JSON decoding error",
"test.ntriples": "JSON decoding error",
"literals-05.nt": "JSON decoding error",
"i18n-01.nt": "JSON decoding error",
"literals-04.nt": "JSON decoding error",
"rdflibtest01.nt": "JSON decoding error",
"rdflibtest05.nt": "JSON decoding error",
}
tests = 0
skipped = 0
skip = False
print()
p = TEST_DIR.glob("**/*")
for f in [x for x in p if x.is_file()]:
tests += 1
print(f"Test {tests}: {f}")
if f.name not in files_to_skip.keys():
try:
cg = ConjunctiveGraph().parse(f, format="nt")
# print(cg.serialize(format="n3"))
except:
print(f"Skipping: could not NT parse")
skipped += 1
skip = True
if not skip:
cg2 = ConjunctiveGraph()
cg2.parse(
data=cg.serialize(format="hext"),
format="hext",
publicID=cg2.default_context.identifier
)
if cg2.context_aware:
for context in cg2.contexts():
for triple in context.triples((None, None, None)):
if type(triple[2]) == Literal:
if triple[2].datatype == XSD.string:
context.remove((triple[0], triple[1], triple[2]))
context.add((triple[0], triple[1], Literal(str(triple[2]))))
else:
for triple in cg2.triples((None, None, None)):
if type(triple[2]) == Literal:
if triple[2].datatype == XSD.string:
cg2.remove((triple[0], triple[1], triple[2]))
cg2.add((triple[0], triple[1], Literal(str(triple[2]))))
# print(cg2.serialize(format="trig"))
assert cg.isomorphic(cg2)
skip = False
else:
print(f"Skipping: {files_to_skip[f.name]}")
print(f"No. tests: {tests}")
print(f"No. tests skipped: {skipped}")
if __name__ == "__main__":
test_small_file_multigraph()
|