1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
|
import os
import fsspec
import numpy as np
from packaging.version import Version
import pytest
from kerchunk import netCDF3
from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
from kerchunk.utils import refs_as_store
xr = pytest.importorskip("xarray")
has_xarray_2023_8_0 = Version(xr.__version__) >= Version("2023.8.0")
arr = np.random.rand(1, 10, 10)
data = xr.DataArray(
data=arr.squeeze(),
dims=["x", "y"],
name="data",
)
bdata = xr.Dataset({"data": data}, attrs={"attr0": 3}).to_netcdf(
format="NETCDF3_CLASSIC"
)
def test_one(m):
m.pipe("data.nc3", bdata)
h = netCDF3.netcdf_recording_file("memory://data.nc3")
out = h.translate()
print(out)
store = refs_as_store(out)
ds = xr.open_dataset(
store,
engine="zarr",
backend_kwargs={
"consolidated": False,
"zarr_format": 2,
},
)
assert (ds.data == data).all()
@pytest.fixture()
def unlimited_dataset(tmpdir):
# https://unidata.github.io/netcdf4-python/#creatingopeningclosing-a-netcdf-file
from netCDF4 import Dataset
fn = os.path.join(tmpdir, "test.nc")
rootgrp = Dataset(fn, "w", format="NETCDF3_CLASSIC")
rootgrp.createDimension("time", None)
rootgrp.createDimension("lat", 10)
rootgrp.createDimension("lon", 5)
rootgrp.createVariable("time", "f8", ("time",))
# reference time is an unbounded dimension that is a half byte long, so it
# has padding to line up to take up exactly one byte. It is here to test that
# kerchunk can handle the padding correctly and read following variables
# correctly.
rootgrp.createVariable("reference_time", "h", ("time",))
rootgrp.title = "testing"
latitudes = rootgrp.createVariable("lat", "f4", ("lat",))
longitudes = rootgrp.createVariable("lon", "f4", ("lon",))
temp = rootgrp.createVariable(
"temp",
"f4",
(
"time",
"lat",
"lon",
),
)
temp.units = "K"
latitudes[:] = np.arange(-0.5, 0.5, 0.1)
longitudes[:] = np.arange(0, 0.5, 0.1)
for i in range(8):
temp[i] = np.random.uniform(size=(1, 10, 5))
rootgrp.close()
return fn
def test_unlimited(unlimited_dataset):
fn = unlimited_dataset
expected = xr.open_dataset(fn, engine="scipy")
h = netCDF3.NetCDF3ToZarr(fn)
out = h.translate()
fs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
store = refs_as_store(out, fs)
ds = xr.open_zarr(
store,
zarr_format=2,
consolidated=False,
)
assert ds.attrs["title"] == "testing"
assert ds.temp.attrs["units"] == "K"
assert (ds.lat.values == expected.lat.values).all()
assert (ds.lon.values == expected.lon.values).all()
assert (ds.temp.values == expected.temp.values).all()
@pytest.fixture()
def matching_coordinate_dimension_dataset(tmpdir):
"""Create a dataset with a coordinate dimension that matches the name of a
variable dimension."""
# https://unidata.github.io/netcdf4-python/#creatingopeningclosing-a-netcdf-file
from netCDF4 import Dataset
fn = os.path.join(tmpdir, "test.nc")
rootgrp = Dataset(fn, "w", format="NETCDF3_64BIT")
rootgrp.createDimension("node", 2)
rootgrp.createDimension("sigma", 2)
node = rootgrp.createVariable("node", "i4", ("node",))
sigma = rootgrp.createVariable("sigma", "f8", ("sigma", "node"))
node[:] = [0, 1]
for i in range(2):
sigma[i] = np.random.uniform(size=(2,))
rootgrp.close()
return fn
@pytest.mark.skipif(
not has_xarray_2023_8_0, reason="XArray 2023.08.0 is required for this behavior."
)
def test_matching_coordinate_dimension(matching_coordinate_dimension_dataset):
fn = matching_coordinate_dimension_dataset
expected = xr.open_dataset(fn, engine="scipy")
h = netCDF3.NetCDF3ToZarr(fn)
out = h.translate()
ds = xr.open_dataset(
"reference://",
engine="zarr",
backend_kwargs={
"consolidated": False,
"storage_options": {"fo": out},
},
)
assert (ds.node.values == expected.node.values).all()
assert (ds.sigma.values == expected.sigma.values).all()
|