1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
|
import numpy as np
import xarray as xr
class DatasetAddVariable:
param_names = ["existing_elements"]
params = [[0, 10, 100, 1000]]
def setup(self, existing_elements):
self.datasets = {}
# Dictionary insertion is fast(er) than xarray.Dataset insertion
d = {}
for i in range(existing_elements):
d[f"var{i}"] = i
self.dataset = xr.merge([d])
d = {f"set_2_{i}": i for i in range(existing_elements)}
self.dataset2 = xr.merge([d])
def time_variable_insertion(self, existing_elements):
dataset = self.dataset
dataset["new_var"] = 0
def time_merge_two_datasets(self, existing_elements):
xr.merge([self.dataset, self.dataset2])
class DatasetCreation:
# The idea here is to time how long it takes to go from numpy
# and python data types, to a full dataset
# See discussion
# https://github.com/pydata/xarray/issues/7224#issuecomment-1292216344
param_names = ["strategy", "count"]
params = [
["dict_of_DataArrays", "dict_of_Variables", "dict_of_Tuples"],
[0, 1, 10, 100, 1000],
]
def setup(self, strategy, count):
data = np.array(["0", "b"], dtype=str)
self.dataset_coords = dict(time=np.array([0, 1]))
self.dataset_attrs = dict(description="Test data")
attrs = dict(units="Celsius")
if strategy == "dict_of_DataArrays":
def create_data_vars():
return {
f"long_variable_name_{i}": xr.DataArray(
data=data, dims=("time"), attrs=attrs
)
for i in range(count)
}
elif strategy == "dict_of_Variables":
def create_data_vars():
return {
f"long_variable_name_{i}": xr.Variable("time", data, attrs=attrs)
for i in range(count)
}
elif strategy == "dict_of_Tuples":
def create_data_vars():
return {
f"long_variable_name_{i}": ("time", data, attrs)
for i in range(count)
}
self.create_data_vars = create_data_vars
def time_dataset_creation(self, strategy, count):
data_vars = self.create_data_vars()
xr.Dataset(
data_vars=data_vars, coords=self.dataset_coords, attrs=self.dataset_attrs
)
|