File: merge.py

package info (click to toggle)
python-xarray 2025.08.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 11,796 kB
  • sloc: python: 115,416; makefile: 258; sh: 47
file content (77 lines) | stat: -rw-r--r-- 2,443 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import numpy as np

import xarray as xr


class DatasetAddVariable:
    param_names = ["existing_elements"]
    params = [[0, 10, 100, 1000]]

    def setup(self, existing_elements):
        self.datasets = {}
        # Dictionary insertion is fast(er) than xarray.Dataset insertion
        d = {}
        for i in range(existing_elements):
            d[f"var{i}"] = i
        self.dataset = xr.merge([d])

        d = {f"set_2_{i}": i for i in range(existing_elements)}
        self.dataset2 = xr.merge([d])

    def time_variable_insertion(self, existing_elements):
        dataset = self.dataset
        dataset["new_var"] = 0

    def time_merge_two_datasets(self, existing_elements):
        xr.merge([self.dataset, self.dataset2])


class DatasetCreation:
    # The idea here is to time how long it takes to go from numpy
    # and python data types, to a full dataset
    # See discussion
    # https://github.com/pydata/xarray/issues/7224#issuecomment-1292216344
    param_names = ["strategy", "count"]
    params = [
        ["dict_of_DataArrays", "dict_of_Variables", "dict_of_Tuples"],
        [0, 1, 10, 100, 1000],
    ]

    def setup(self, strategy, count):
        data = np.array(["0", "b"], dtype=str)
        self.dataset_coords = dict(time=np.array([0, 1]))
        self.dataset_attrs = dict(description="Test data")
        attrs = dict(units="Celsius")
        if strategy == "dict_of_DataArrays":

            def create_data_vars():
                return {
                    f"long_variable_name_{i}": xr.DataArray(
                        data=data, dims=("time"), attrs=attrs
                    )
                    for i in range(count)
                }

        elif strategy == "dict_of_Variables":

            def create_data_vars():
                return {
                    f"long_variable_name_{i}": xr.Variable("time", data, attrs=attrs)
                    for i in range(count)
                }

        elif strategy == "dict_of_Tuples":

            def create_data_vars():
                return {
                    f"long_variable_name_{i}": ("time", data, attrs)
                    for i in range(count)
                }

        self.create_data_vars = create_data_vars

    def time_dataset_creation(self, strategy, count):
        data_vars = self.create_data_vars()
        xr.Dataset(
            data_vars=data_vars, coords=self.dataset_coords, attrs=self.dataset_attrs
        )