1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
import json
import numcodecs
import numpy as np
import pytest
from packaging.version import Version
import zarr
from zarr.abc.codec import SupportsSyncCodec
from zarr.codecs import BloscCodec
from zarr.codecs.blosc import BloscShuffle, Shuffle
from zarr.core.array_spec import ArrayConfig, ArraySpec
from zarr.core.buffer import default_buffer_prototype
from zarr.core.dtype import UInt16, get_data_type_from_native_dtype
from zarr.storage import MemoryStore, StorePath
@pytest.mark.parametrize("dtype", ["uint8", "uint16"])
async def test_blosc_evolve(dtype: str) -> None:
typesize = np.dtype(dtype).itemsize
path = "blosc_evolve"
store = MemoryStore()
spath = StorePath(store, path)
zarr.create_array(
spath,
shape=(16, 16),
chunks=(16, 16),
dtype=dtype,
fill_value=0,
compressors=BloscCodec(),
)
buf = await store.get(f"{path}/zarr.json", prototype=default_buffer_prototype())
assert buf is not None
zarr_json = json.loads(buf.to_bytes())
blosc_configuration_json = zarr_json["codecs"][1]["configuration"]
assert blosc_configuration_json["typesize"] == typesize
if typesize == 1:
assert blosc_configuration_json["shuffle"] == "bitshuffle"
else:
assert blosc_configuration_json["shuffle"] == "shuffle"
path2 = "blosc_evolve_sharding"
spath2 = StorePath(store, path2)
zarr.create_array(
spath2,
shape=(16, 16),
chunks=(16, 16),
shards=(16, 16),
dtype=dtype,
fill_value=0,
compressors=BloscCodec(),
)
buf = await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype())
assert buf is not None
zarr_json = json.loads(buf.to_bytes())
blosc_configuration_json = zarr_json["codecs"][0]["configuration"]["codecs"][1]["configuration"]
assert blosc_configuration_json["typesize"] == typesize
if typesize == 1:
assert blosc_configuration_json["shuffle"] == "bitshuffle"
else:
assert blosc_configuration_json["shuffle"] == "shuffle"
@pytest.mark.parametrize("shuffle", [None, "bitshuffle", BloscShuffle.shuffle])
@pytest.mark.parametrize("typesize", [None, 1, 2])
def test_tunable_attrs_param(shuffle: None | Shuffle | BloscShuffle, typesize: None | int) -> None:
"""
Test that the tunable_attrs parameter is set as expected when creating a BloscCodec,
"""
codec = BloscCodec(typesize=typesize, shuffle=shuffle)
if shuffle is None:
assert codec.shuffle == BloscShuffle.bitshuffle # default shuffle
assert "shuffle" in codec._tunable_attrs
if typesize is None:
assert codec.typesize == 1 # default typesize
assert "typesize" in codec._tunable_attrs
new_dtype = UInt16()
array_spec = ArraySpec(
shape=(1,),
dtype=new_dtype,
fill_value=1,
prototype=default_buffer_prototype(),
config={}, # type: ignore[arg-type]
)
evolved_codec = codec.evolve_from_array_spec(array_spec=array_spec)
if typesize is None:
assert evolved_codec.typesize == new_dtype.item_size
else:
assert evolved_codec.typesize == codec.typesize
if shuffle is None:
assert evolved_codec.shuffle == BloscShuffle.shuffle
else:
assert evolved_codec.shuffle == codec.shuffle
async def test_typesize() -> None:
a = np.arange(1000000, dtype=np.uint64)
codecs = [zarr.codecs.BytesCodec(), zarr.codecs.BloscCodec()]
z = zarr.array(a, chunks=(10000), codecs=codecs)
data = await z.store.get("c/0", prototype=default_buffer_prototype())
assert data is not None
bytes = data.to_bytes()
size = len(bytes)
msg = f"Blosc size mismatch. First 10 bytes: {bytes[:20]!r} and last 10 bytes: {bytes[-20:]!r}"
if Version(numcodecs.__version__) >= Version("0.16.0"):
expected_size = 402
assert size == expected_size, msg
else:
expected_size = 10216
assert size == expected_size, msg
def test_blosc_codec_supports_sync() -> None:
assert isinstance(BloscCodec(), SupportsSyncCodec)
def test_blosc_codec_sync_roundtrip() -> None:
codec = BloscCodec(typesize=8)
arr = np.arange(100, dtype="float64")
zdtype = get_data_type_from_native_dtype(arr.dtype)
spec = ArraySpec(
shape=arr.shape,
dtype=zdtype,
fill_value=zdtype.cast_scalar(0),
config=ArrayConfig(order="C", write_empty_chunks=True),
prototype=default_buffer_prototype(),
)
buf = default_buffer_prototype().buffer.from_array_like(arr.view("B"))
encoded = codec._encode_sync(buf, spec)
assert encoded is not None
decoded = codec._decode_sync(encoded, spec)
result = np.frombuffer(decoded.as_numpy_array(), dtype="float64")
np.testing.assert_array_equal(arr, result)
|