1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
|
import fsspec
import numpy as np
import pandas as pd
import pytest
import xarray as xr
import fsspec.implementations.reference as reffs
import ujson
import kerchunk.combine
import kerchunk.zarr
import kerchunk.utils
@pytest.fixture(scope="module")
def ds():
ds = xr.Dataset(
{
"x": xr.DataArray(np.linspace(-np.pi, np.pi, 10), dims=["x"]),
"y": xr.DataArray(np.linspace(-np.pi / 2, np.pi / 2, 10), dims=["y"]),
"time": xr.DataArray(pd.date_range("2020", "2021"), dims=["time"]),
},
)
ds["temp"] = (
np.cos(ds.x)
* np.sin(ds.y)
* xr.ones_like(ds.time).astype("float")
* np.random.random(ds.time.shape)
)
return ds
@pytest.fixture
def zarr_in_zip(tmpdir, ds):
def _zip(file):
import os
import zipfile
filename = file + os.path.extsep + "zip"
with zipfile.ZipFile(
filename, "w", compression=zipfile.ZIP_STORED, allowZip64=True
) as fh:
for root, _, filenames in os.walk(file):
for each_filename in filenames:
each_filename = os.path.join(root, each_filename)
fh.write(each_filename, os.path.relpath(each_filename, file))
return filename
fn = f"{tmpdir}/test.zarr"
ds.to_zarr(fn, mode="w", zarr_format=2)
return _zip(fn)
def test_zarr_in_zip(zarr_in_zip, ds):
out = kerchunk.zarr.ZarrToZarr(
url="zip://", storage_options={"fo": zarr_in_zip}
).translate()
ds2 = xr.open_dataset(
out,
engine="kerchunk",
backend_kwargs={
"storage_options": {
"remote_protocol": "zip",
"remote_options": {"fo": zarr_in_zip},
}
},
)
assert ds.equals(ds2)
# tests inlining of metadata
fs = fsspec.filesystem(
"reference", fo=out, remote_protocol="zip", remote_options={"fo": zarr_in_zip}
)
assert isinstance(fs.references["temp/.zarray"], (str, bytes))
def test_zarr_combine(tmpdir, ds):
fn1 = f"{tmpdir}/test1.zarr"
ds.to_zarr(fn1, zarr_format=2)
one = kerchunk.zarr.ZarrToZarr(fn1, inline_threshold=0).translate()
fn = f"{tmpdir}/out.parq"
out = reffs.LazyReferenceMapper.create(fn)
mzz = kerchunk.combine.MultiZarrToZarr([one], concat_dims=["time"], out=out)
mzz.translate()
ds2 = xr.open_dataset(fn, engine="kerchunk")
assert ds.equals(ds2)
def test_zarr_json_dump_succeeds(tmpdir, ds):
fn1 = f"{tmpdir}/test1.zarr"
ds.to_zarr(fn1, zarr_format=2)
one = kerchunk.zarr.ZarrToZarr(
fn1,
inline_threshold=0,
).translate()
ujson.dumps(one)
|