File: combine.py

package info (click to toggle)
python-xarray 2025.08.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 11,796 kB
  • sloc: python: 115,416; makefile: 258; sh: 47
file content (79 lines) | stat: -rw-r--r-- 2,417 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import numpy as np

import xarray as xr

from . import requires_dask


class Combine1d:
    """Benchmark concatenating and merging large datasets"""

    def setup(self) -> None:
        """Create 2 datasets with two different variables"""

        t_size = 8000
        t = np.arange(t_size)
        data = np.random.randn(t_size)

        self.dsA0 = xr.Dataset({"A": xr.DataArray(data, coords={"T": t}, dims=("T"))})
        self.dsA1 = xr.Dataset(
            {"A": xr.DataArray(data, coords={"T": t + t_size}, dims=("T"))}
        )

    def time_combine_by_coords(self) -> None:
        """Also has to load and arrange t coordinate"""
        datasets = [self.dsA0, self.dsA1]

        xr.combine_by_coords(datasets)


class Combine1dDask(Combine1d):
    """Benchmark concatenating and merging large datasets"""

    def setup(self) -> None:
        """Create 2 datasets with two different variables"""
        requires_dask()

        t_size = 8000
        t = np.arange(t_size)
        var = xr.Variable(dims=("T",), data=np.random.randn(t_size)).chunk()

        data_vars = {f"long_name_{v}": ("T", var) for v in range(500)}

        self.dsA0 = xr.Dataset(data_vars, coords={"T": t})
        self.dsA1 = xr.Dataset(data_vars, coords={"T": t + t_size})


class Combine3d:
    """Benchmark concatenating and merging large datasets"""

    def setup(self):
        """Create 4 datasets with two different variables"""

        t_size, x_size, y_size = 50, 450, 400
        t = np.arange(t_size)
        data = np.random.randn(t_size, x_size, y_size)

        self.dsA0 = xr.Dataset(
            {"A": xr.DataArray(data, coords={"T": t}, dims=("T", "X", "Y"))}
        )
        self.dsA1 = xr.Dataset(
            {"A": xr.DataArray(data, coords={"T": t + t_size}, dims=("T", "X", "Y"))}
        )
        self.dsB0 = xr.Dataset(
            {"B": xr.DataArray(data, coords={"T": t}, dims=("T", "X", "Y"))}
        )
        self.dsB1 = xr.Dataset(
            {"B": xr.DataArray(data, coords={"T": t + t_size}, dims=("T", "X", "Y"))}
        )

    def time_combine_nested(self):
        datasets = [[self.dsA0, self.dsA1], [self.dsB0, self.dsB1]]

        xr.combine_nested(datasets, concat_dim=[None, "T"])

    def time_combine_by_coords(self):
        """Also has to load and arrange t coordinate"""
        datasets = [self.dsA0, self.dsA1, self.dsB0, self.dsB1]

        xr.combine_by_coords(datasets)