File: test_netcdf.py

package info (click to toggle)
kerchunk 0.2.9-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 135,172 kB
  • sloc: python: 6,477; makefile: 39
file content (147 lines) | stat: -rw-r--r-- 4,232 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import os

import fsspec
import numpy as np
from packaging.version import Version
import pytest
from kerchunk import netCDF3

from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
from kerchunk.utils import refs_as_store

xr = pytest.importorskip("xarray")


has_xarray_2023_8_0 = Version(xr.__version__) >= Version("2023.8.0")


arr = np.random.rand(1, 10, 10)
data = xr.DataArray(
    data=arr.squeeze(),
    dims=["x", "y"],
    name="data",
)
bdata = xr.Dataset({"data": data}, attrs={"attr0": 3}).to_netcdf(
    format="NETCDF3_CLASSIC"
)


def test_one(m):
    m.pipe("data.nc3", bdata)
    h = netCDF3.netcdf_recording_file("memory://data.nc3")
    out = h.translate()

    print(out)

    store = refs_as_store(out)

    ds = xr.open_dataset(
        store,
        engine="zarr",
        backend_kwargs={
            "consolidated": False,
            "zarr_format": 2,
        },
    )
    assert (ds.data == data).all()


@pytest.fixture()
def unlimited_dataset(tmpdir):
    # https://unidata.github.io/netcdf4-python/#creatingopeningclosing-a-netcdf-file
    from netCDF4 import Dataset

    fn = os.path.join(tmpdir, "test.nc")
    rootgrp = Dataset(fn, "w", format="NETCDF3_CLASSIC")
    rootgrp.createDimension("time", None)
    rootgrp.createDimension("lat", 10)
    rootgrp.createDimension("lon", 5)
    rootgrp.createVariable("time", "f8", ("time",))
    # reference time is an unbounded dimension that is a half byte long, so it
    # has padding to line up to take up exactly one byte. It is here to test that
    # kerchunk can handle the padding correctly and read following variables
    # correctly.
    rootgrp.createVariable("reference_time", "h", ("time",))
    rootgrp.title = "testing"
    latitudes = rootgrp.createVariable("lat", "f4", ("lat",))
    longitudes = rootgrp.createVariable("lon", "f4", ("lon",))
    temp = rootgrp.createVariable(
        "temp",
        "f4",
        (
            "time",
            "lat",
            "lon",
        ),
    )
    temp.units = "K"
    latitudes[:] = np.arange(-0.5, 0.5, 0.1)
    longitudes[:] = np.arange(0, 0.5, 0.1)
    for i in range(8):
        temp[i] = np.random.uniform(size=(1, 10, 5))
    rootgrp.close()
    return fn


def test_unlimited(unlimited_dataset):
    fn = unlimited_dataset
    expected = xr.open_dataset(fn, engine="scipy")
    h = netCDF3.NetCDF3ToZarr(fn)
    out = h.translate()

    fs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
    store = refs_as_store(out, fs)

    ds = xr.open_zarr(
        store,
        zarr_format=2,
        consolidated=False,
    )
    assert ds.attrs["title"] == "testing"
    assert ds.temp.attrs["units"] == "K"
    assert (ds.lat.values == expected.lat.values).all()
    assert (ds.lon.values == expected.lon.values).all()
    assert (ds.temp.values == expected.temp.values).all()


@pytest.fixture()
def matching_coordinate_dimension_dataset(tmpdir):
    """Create a dataset with a coordinate dimension that matches the name of a
    variable dimension."""
    # https://unidata.github.io/netcdf4-python/#creatingopeningclosing-a-netcdf-file
    from netCDF4 import Dataset

    fn = os.path.join(tmpdir, "test.nc")
    rootgrp = Dataset(fn, "w", format="NETCDF3_64BIT")
    rootgrp.createDimension("node", 2)
    rootgrp.createDimension("sigma", 2)

    node = rootgrp.createVariable("node", "i4", ("node",))
    sigma = rootgrp.createVariable("sigma", "f8", ("sigma", "node"))

    node[:] = [0, 1]
    for i in range(2):
        sigma[i] = np.random.uniform(size=(2,))

    rootgrp.close()
    return fn


@pytest.mark.skipif(
    not has_xarray_2023_8_0, reason="XArray 2023.08.0 is required for this behavior."
)
def test_matching_coordinate_dimension(matching_coordinate_dimension_dataset):
    fn = matching_coordinate_dimension_dataset
    expected = xr.open_dataset(fn, engine="scipy")
    h = netCDF3.NetCDF3ToZarr(fn)
    out = h.translate()
    ds = xr.open_dataset(
        "reference://",
        engine="zarr",
        backend_kwargs={
            "consolidated": False,
            "storage_options": {"fo": out},
        },
    )
    assert (ds.node.values == expected.node.values).all()
    assert (ds.sigma.values == expected.sigma.values).all()