1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
|
from __future__ import annotations
import json
from pathlib import Path
import numpy as np
import pytest
import uhi.io.json
from uhi.io import from_sparse, to_sparse
from uhi.typing.serialization import HistogramIR, WeightedStorageIR
def test_to_from_sparse_roundtrip() -> None:
# Original dense data
hist: HistogramIR = {
"uhi_schema": 1,
"storage": {
"type": "weighted",
"values": np.array([[0, 1, 0], [0, 2, 0]], dtype=float),
"variances": np.array([[0, 0.1, 0], [0, 0, 0]], dtype=float),
},
"axes": [
{"type": "boolean"},
{
"type": "regular",
"bins": 3,
"overflow": False,
"underflow": False,
"lower": 0,
"upper": 1,
"circular": False,
},
],
}
# Convert to sparse
shist = to_sparse(hist)
sparse: WeightedStorageIR = shist["storage"] # type: ignore[assignment]
# Basic checks on sparse structure
assert "index" in sparse
index = sparse["index"]
assert index.shape[0] == 2
assert index.shape[1] == 2
# Verify sparse arrays align with mask
assert np.all(sparse["values"] == np.array([1.0, 2.0]))
assert np.all(sparse["variances"] == np.array([0.1, 0.0]))
# Convert back to dense
dense = from_sparse(shist)
# Check round-trip reconstruction
for key, value in hist["storage"].items():
if key == "type":
continue
assert np.allclose(dense["storage"][key], value) # type: ignore[literal-required]
def test_all_valid(valid: Path) -> None:
data = valid.read_text(encoding="utf-8")
hists = json.loads(data, object_hook=uhi.io.json.object_hook)
hists = {
k: from_sparse(v) if "index" in v.get("storage", {}) else v
for k, v in hists.items()
}
shists = {k: to_sparse(v) for k, v in hists.items()}
for h in shists.values():
if h["axes"]:
assert "index" in h["storage"]
dhists = {k: from_sparse(v) for k, v in shists.items()}
for h in dhists.values():
assert "index" not in h["storage"]
assert hists.keys() == dhists.keys()
for v, dv in zip(hists.values(), dhists.values()):
assert v.keys() == dv.keys()
assert v["axes"] == dv["axes"]
assert v["storage"].keys() == dv["storage"].keys()
assert v["storage"]["values"] == pytest.approx(dv["storage"]["values"])
if v["storage"]["type"] == "weighted_mean":
v_var = v["storage"]["variances"]
dv_var = dv["storage"]["variances"]
assert np.all(np.isnan(v_var) == np.isnan(dv_var))
assert v_var[~np.isnan(v_var)] == pytest.approx(dv_var[~np.isnan(dv_var)])
assert v["storage"]["sum_of_weights"] == pytest.approx(
dv["storage"]["sum_of_weights"]
)
assert v["storage"]["sum_of_weights_squared"] == pytest.approx(
dv["storage"]["sum_of_weights_squared"]
)
else:
assert v["storage"].get("variances") == pytest.approx(
dv["storage"].get("variances")
)
assert v["storage"].get("counts") == pytest.approx(
dv["storage"].get("counts")
)
@pytest.mark.parametrize("name", ["mean", "weighted_mean"])
def test_mean(resources: Path, name: str) -> None:
data = resources.joinpath("valid/mean.json").read_text()
hist = json.loads(data, object_hook=uhi.io.json.object_hook)[name]
sparse_hist = to_sparse(hist)
assert len(sparse_hist["storage"]["values"]) == 2
assert sparse_hist["storage"]["index"].shape == (1, 2)
|