File: test_combine_dask.py

package info (click to toggle)
kerchunk 0.2.9-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 135,172 kB
  • sloc: python: 6,477; makefile: 39
file content (42 lines) | stat: -rw-r--r-- 1,303 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import pytest
import re

import fsspec
import xarray as xr

from kerchunk.combine import auto_dask
from kerchunk.utils import refs_as_store
from kerchunk.zarr import ZarrToZarr

dask = pytest.importorskip("dask")


@pytest.mark.parametrize("n_batches", [1, 2, 3])
def test_simplest(m, n_batches):
    for i in range(4):
        m.pipe(
            {
                f"data{i}/.zgroup": b'{"zarr_format":2}',
                f"data{i}/data/.zarray": b'{"chunks":[3],"compressor": null,"dtype": "|i1",'
                b'"fill_value": 0,"filters": null,"order": "C",'
                b'"shape": [3],"zarr_format": 2}',
                f"data{i}/data/0": f"{i}{i}{i}".encode(),
            }
        )
    out = auto_dask(
        [f"memory:///data{i}" for i in range(4)],
        single_driver=ZarrToZarr,
        single_kwargs={"inline": 0},
        n_batches=n_batches,
        mzz_kwargs={
            "coo_map": {"count": re.compile(r".*(\d)")},
            "inline_threshold": 0,
            "coo_dtypes": {"count": "i4"},
        },
    )
    ds = xr.open_dataset(
        refs_as_store(out), engine="zarr", backend_kwargs={"consolidated": False}
    )
    assert ds["count"].values.tolist() == [0, 1, 2, 3]
    assert ds.data.shape == (4, 3)
    assert (ds.data.values.T == [48, 49, 50, 51]).all()