File: pandas.py

package info (click to toggle)
python-xarray 2025.08.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 11,796 kB
  • sloc: python: 115,416; makefile: 258; sh: 47
file content (64 lines) | stat: -rw-r--r-- 1,763 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import numpy as np
import pandas as pd

import xarray as xr

from . import parameterized, requires_dask


class MultiIndexSeries:
    def setup(self, dtype, subset):
        data = np.random.rand(100000).astype(dtype)
        index = pd.MultiIndex.from_product(
            [
                list("abcdefhijk"),
                list("abcdefhijk"),
                pd.date_range(start="2000-01-01", periods=1000, freq="D"),
            ]
        )
        series = pd.Series(data, index)
        if subset:
            series = series[::3]
        self.series = series

    @parameterized(["dtype", "subset"], ([int, float], [True, False]))
    def time_from_series(self, dtype, subset):
        xr.DataArray.from_series(self.series)


class ToDataFrame:
    def setup(self, *args, **kwargs):
        xp = kwargs.get("xp", np)
        nvars = kwargs.get("nvars", 1)
        random_kws = kwargs.get("random_kws", {})
        method = kwargs.get("method", "to_dataframe")

        dim1 = 10_000
        dim2 = 10_000

        var = xr.Variable(
            dims=("dim1", "dim2"), data=xp.random.random((dim1, dim2), **random_kws)
        )
        data_vars = {f"long_name_{v}": (("dim1", "dim2"), var) for v in range(nvars)}

        ds = xr.Dataset(
            data_vars, coords={"dim1": np.arange(0, dim1), "dim2": np.arange(0, dim2)}
        )
        self.to_frame = getattr(ds, method)

    def time_to_dataframe(self):
        self.to_frame()

    def peakmem_to_dataframe(self):
        self.to_frame()


class ToDataFrameDask(ToDataFrame):
    def setup(self, *args, **kwargs):
        requires_dask()

        import dask.array as da

        super().setup(
            xp=da, random_kws=dict(chunks=5000), method="to_dask_dataframe", nvars=500
        )