File: test_zarr.py

package info (click to toggle)
kerchunk 0.2.9-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 135,172 kB
  • sloc: python: 6,477; makefile: 39
file content (99 lines) | stat: -rw-r--r-- 2,693 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import fsspec
import numpy as np
import pandas as pd
import pytest
import xarray as xr
import fsspec.implementations.reference as reffs
import ujson

import kerchunk.combine
import kerchunk.zarr
import kerchunk.utils


@pytest.fixture(scope="module")
def ds():
    ds = xr.Dataset(
        {
            "x": xr.DataArray(np.linspace(-np.pi, np.pi, 10), dims=["x"]),
            "y": xr.DataArray(np.linspace(-np.pi / 2, np.pi / 2, 10), dims=["y"]),
            "time": xr.DataArray(pd.date_range("2020", "2021"), dims=["time"]),
        },
    )
    ds["temp"] = (
        np.cos(ds.x)
        * np.sin(ds.y)
        * xr.ones_like(ds.time).astype("float")
        * np.random.random(ds.time.shape)
    )
    return ds


@pytest.fixture
def zarr_in_zip(tmpdir, ds):
    def _zip(file):
        import os
        import zipfile

        filename = file + os.path.extsep + "zip"
        with zipfile.ZipFile(
            filename, "w", compression=zipfile.ZIP_STORED, allowZip64=True
        ) as fh:
            for root, _, filenames in os.walk(file):
                for each_filename in filenames:
                    each_filename = os.path.join(root, each_filename)
                    fh.write(each_filename, os.path.relpath(each_filename, file))
        return filename

    fn = f"{tmpdir}/test.zarr"
    ds.to_zarr(fn, mode="w", zarr_format=2)
    return _zip(fn)


def test_zarr_in_zip(zarr_in_zip, ds):
    out = kerchunk.zarr.ZarrToZarr(
        url="zip://", storage_options={"fo": zarr_in_zip}
    ).translate()

    ds2 = xr.open_dataset(
        out,
        engine="kerchunk",
        backend_kwargs={
            "storage_options": {
                "remote_protocol": "zip",
                "remote_options": {"fo": zarr_in_zip},
            }
        },
    )
    assert ds.equals(ds2)

    # tests inlining of metadata
    fs = fsspec.filesystem(
        "reference", fo=out, remote_protocol="zip", remote_options={"fo": zarr_in_zip}
    )
    assert isinstance(fs.references["temp/.zarray"], (str, bytes))


def test_zarr_combine(tmpdir, ds):
    fn1 = f"{tmpdir}/test1.zarr"
    ds.to_zarr(fn1, zarr_format=2)

    one = kerchunk.zarr.ZarrToZarr(fn1, inline_threshold=0).translate()
    fn = f"{tmpdir}/out.parq"
    out = reffs.LazyReferenceMapper.create(fn)
    mzz = kerchunk.combine.MultiZarrToZarr([one], concat_dims=["time"], out=out)
    mzz.translate()

    ds2 = xr.open_dataset(fn, engine="kerchunk")
    assert ds.equals(ds2)


def test_zarr_json_dump_succeeds(tmpdir, ds):
    fn1 = f"{tmpdir}/test1.zarr"
    ds.to_zarr(fn1, zarr_format=2)

    one = kerchunk.zarr.ZarrToZarr(
        fn1,
        inline_threshold=0,
    ).translate()
    ujson.dumps(one)