1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
|
from contextlib import contextmanager
from io import BytesIO
from pathlib import Path
from lxml.etree import iterparse
__HERE__ = Path(__file__).parent.absolute()
def _parse_xml(data, strip_ns=False):
data = bytes(data, "utf8")
try:
it = iterparse(BytesIO(data))
for _, el in it:
if "}" in el.tag and strip_ns: # pragma: no branch
# strip all namespaces
el.tag = el.tag.split("}", 1)[1]
return it.root
except Exception as err: # pragma: no cover
snippet = repr(data)
if len(snippet) > 35:
snippet = f"{snippet[:35]} ..."
raise ValueError(f"Unable to parse XML: {err} ({snippet})") from err
@contextmanager
def text(path, encoding="utf8"):
with __HERE__.joinpath(path).open("r", encoding=encoding) as resource_fh:
yield resource_fh
@contextmanager
def xml(path, encoding="utf8"):
with __HERE__.joinpath(path).open("r", encoding=encoding) as resource_fh:
yield _parse_xml(resource_fh.read(), strip_ns=True)
|