1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
|
import numpy as np
import xarray as xr
from . import requires_dask
class Combine1d:
"""Benchmark concatenating and merging large datasets"""
def setup(self) -> None:
"""Create 2 datasets with two different variables"""
t_size = 8000
t = np.arange(t_size)
data = np.random.randn(t_size)
self.dsA0 = xr.Dataset({"A": xr.DataArray(data, coords={"T": t}, dims=("T"))})
self.dsA1 = xr.Dataset(
{"A": xr.DataArray(data, coords={"T": t + t_size}, dims=("T"))}
)
def time_combine_by_coords(self) -> None:
"""Also has to load and arrange t coordinate"""
datasets = [self.dsA0, self.dsA1]
xr.combine_by_coords(datasets)
class Combine1dDask(Combine1d):
"""Benchmark concatenating and merging large datasets"""
def setup(self) -> None:
"""Create 2 datasets with two different variables"""
requires_dask()
t_size = 8000
t = np.arange(t_size)
var = xr.Variable(dims=("T",), data=np.random.randn(t_size)).chunk()
data_vars = {f"long_name_{v}": ("T", var) for v in range(500)}
self.dsA0 = xr.Dataset(data_vars, coords={"T": t})
self.dsA1 = xr.Dataset(data_vars, coords={"T": t + t_size})
class Combine3d:
"""Benchmark concatenating and merging large datasets"""
def setup(self):
"""Create 4 datasets with two different variables"""
t_size, x_size, y_size = 50, 450, 400
t = np.arange(t_size)
data = np.random.randn(t_size, x_size, y_size)
self.dsA0 = xr.Dataset(
{"A": xr.DataArray(data, coords={"T": t}, dims=("T", "X", "Y"))}
)
self.dsA1 = xr.Dataset(
{"A": xr.DataArray(data, coords={"T": t + t_size}, dims=("T", "X", "Y"))}
)
self.dsB0 = xr.Dataset(
{"B": xr.DataArray(data, coords={"T": t}, dims=("T", "X", "Y"))}
)
self.dsB1 = xr.Dataset(
{"B": xr.DataArray(data, coords={"T": t + t_size}, dims=("T", "X", "Y"))}
)
def time_combine_nested(self):
datasets = [[self.dsA0, self.dsA1], [self.dsB0, self.dsB1]]
xr.combine_nested(datasets, concat_dim=[None, "T"])
def time_combine_by_coords(self):
"""Also has to load and arrange t coordinate"""
datasets = [self.dsA0, self.dsA1, self.dsB0, self.dsB1]
xr.combine_by_coords(datasets)
|