1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
from __future__ import annotations
import logging
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
from runpy import run_path
from typing import Any, Optional, Tuple, Type, Union
import rdflib.util
import test.data
from rdflib.graph import Graph, _GraphT
from rdflib.util import guess_format
GraphSourceType = Union["GraphSource", Path]
SUFFIX_FORMAT_MAP = {**rdflib.util.SUFFIX_FORMAT_MAP, "hext": "hext"}
@dataclass(frozen=True)
class GraphSource:
path: Path
format: str
public_id: Optional[str] = None
@classmethod
def guess_format(cls, path: Path) -> Optional[str]:
format: Optional[str]
if path.suffix == ".py":
format = "python"
else:
format = guess_format(f"{path}", SUFFIX_FORMAT_MAP)
return format
@classmethod
def from_path(
cls, path: Path, public_id: Optional[str] = None, format: Optional[str] = None
) -> GraphSource:
if format is None:
format = cls.guess_format(path)
if format is None:
raise ValueError(f"could not guess format for source {path}")
return cls(path, format, public_id)
@classmethod
def from_paths(cls, *paths: Path) -> Tuple[GraphSource, ...]:
result = []
for path in paths:
result.append(cls.from_path(path))
return tuple(result)
@classmethod
def from_source(
cls, source: GraphSourceType, public_id: Optional[str] = None
) -> GraphSource:
logging.debug("source(%s) = %r", id(source), source)
if isinstance(source, Path):
source = GraphSource.from_path(source)
return source
def public_id_or_path_uri(self) -> str:
if self.public_id is not None:
self.public_id
return self.path.as_uri()
def load(
self,
graph: Optional[_GraphT] = None,
public_id: Optional[str] = None,
# type error: Incompatible default for argument "graph_type" (default has type "Type[Graph]", argument has type "Type[_GraphT]")
# see https://github.com/python/mypy/issues/3737
graph_type: Type[_GraphT] = Graph, # type: ignore[assignment]
) -> _GraphT:
if graph is None:
graph = graph_type()
if self.format == "python":
load_from_python(self.path, graph, graph_type)
else:
graph.parse(
source=self.path,
format=self.format,
publicID=self.public_id if public_id is None else public_id,
)
return graph
@classmethod
def idfn(cls, val: Any) -> Optional[str]:
"""ID function for GraphSource objects.
Args:
val: The value to try to generate and identifier for.
Returns:
A string identifying the given value if the value is a
`GraphSource`, otherwise return `None`.
"""
if isinstance(val, cls):
try:
path_string = f"{val.path.relative_to(test.data.TEST_DATA_DIR)}"
except ValueError:
path_string = f"{val.path}"
return f"GS({path_string}, {val.format}, {val.public_id})"
return None
def load_sources(
*sources: GraphSourceType,
graph: Optional[_GraphT] = None,
public_id: Optional[str] = None,
graph_type: Type[_GraphT] = Graph, # type: ignore[assignment]
) -> _GraphT:
if graph is None:
graph = graph_type()
for source in sources:
GraphSource.from_source(source).load(graph, public_id)
return graph
@lru_cache(maxsize=None)
def cached_graph(
sources: Tuple[Union[GraphSource, Path], ...],
public_id: Optional[str] = None,
graph_type: Type[_GraphT] = Graph, # type: ignore[assignment]
) -> _GraphT:
return load_sources(*sources, public_id=public_id, graph_type=graph_type)
def load_from_python(
path: Path,
graph: Optional[_GraphT] = None,
graph_type: Type[_GraphT] = Graph, # type: ignore[assignment]
) -> _GraphT:
if graph is None:
graph = graph_type()
mod = run_path(f"{path}")
if "populate_graph" not in mod:
raise ValueError(f"{path} does not contain a `populate_graph` function")
mod["populate_graph"](graph)
return graph
|