1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
|
from __future__ import annotations
from collections.abc import Callable, Iterable, Sequence
from typing import TYPE_CHECKING, Any
import numpy as np
from xarray.core.indexing import ImplicitToExplicitIndexingAdapter
from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray
from xarray.namedarray.utils import is_duck_dask_array, module_available
if TYPE_CHECKING:
from xarray.namedarray._typing import (
T_Chunks,
_DType_co,
_NormalizedChunks,
duckarray,
)
try:
from dask.array import Array as DaskArray
except ImportError:
DaskArray = np.ndarray[Any, Any]
dask_available = module_available("dask")
class DaskManager(ChunkManagerEntrypoint["DaskArray"]):
array_cls: type[DaskArray]
available: bool = dask_available
def __init__(self) -> None:
# TODO can we replace this with a class attribute instead?
from dask.array import Array
self.array_cls = Array
def is_chunked_array(self, data: duckarray[Any, Any]) -> bool:
return is_duck_dask_array(data)
def chunks(self, data: Any) -> _NormalizedChunks:
return data.chunks # type: ignore[no-any-return]
def normalize_chunks(
self,
chunks: T_Chunks | _NormalizedChunks,
shape: tuple[int, ...] | None = None,
limit: int | None = None,
dtype: _DType_co | None = None,
previous_chunks: _NormalizedChunks | None = None,
) -> Any:
"""Called by open_dataset"""
from dask.array.core import normalize_chunks
return normalize_chunks(
chunks,
shape=shape,
limit=limit,
dtype=dtype,
previous_chunks=previous_chunks,
) # type: ignore[no-untyped-call]
def from_array(
self, data: Any, chunks: T_Chunks | _NormalizedChunks, **kwargs: Any
) -> DaskArray | Any:
import dask.array as da
if isinstance(data, ImplicitToExplicitIndexingAdapter):
# lazily loaded backend array classes should use NumPy array operations.
kwargs["meta"] = np.ndarray
return da.from_array(
data,
chunks,
**kwargs,
) # type: ignore[no-untyped-call]
def compute(
self, *data: Any, **kwargs: Any
) -> tuple[np.ndarray[Any, _DType_co], ...]:
from dask.array import compute
return compute(*data, **kwargs) # type: ignore[no-untyped-call, no-any-return]
def persist(self, *data: Any, **kwargs: Any) -> tuple[DaskArray | Any, ...]:
from dask import persist
return persist(*data, **kwargs) # type: ignore[no-untyped-call, no-any-return]
@property
def array_api(self) -> Any:
from dask import array as da
return da
def reduction(
self,
arr: T_ChunkedArray,
func: Callable[..., Any],
combine_func: Callable[..., Any] | None = None,
aggregate_func: Callable[..., Any] | None = None,
axis: int | Sequence[int] | None = None,
dtype: _DType_co | None = None,
keepdims: bool = False,
) -> DaskArray | Any:
from dask.array import reduction
return reduction(
arr,
chunk=func,
combine=combine_func,
aggregate=aggregate_func,
axis=axis,
dtype=dtype,
keepdims=keepdims,
) # type: ignore[no-untyped-call]
def scan(
self,
func: Callable[..., Any],
binop: Callable[..., Any],
ident: float,
arr: T_ChunkedArray,
axis: int | None = None,
dtype: _DType_co | None = None,
**kwargs: Any,
) -> DaskArray | Any:
from dask.array.reductions import cumreduction
return cumreduction(
func,
binop,
ident,
arr,
axis=axis,
dtype=dtype,
**kwargs,
) # type: ignore[no-untyped-call]
def apply_gufunc(
self,
func: Callable[..., Any],
signature: str,
*args: Any,
axes: Sequence[tuple[int, ...]] | None = None,
axis: int | None = None,
keepdims: bool = False,
output_dtypes: Sequence[_DType_co] | None = None,
output_sizes: dict[str, int] | None = None,
vectorize: bool | None = None,
allow_rechunk: bool = False,
meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None,
**kwargs: Any,
) -> Any:
from dask.array.gufunc import apply_gufunc
return apply_gufunc(
func,
signature,
*args,
axes=axes,
axis=axis,
keepdims=keepdims,
output_dtypes=output_dtypes,
output_sizes=output_sizes,
vectorize=vectorize,
allow_rechunk=allow_rechunk,
meta=meta,
**kwargs,
) # type: ignore[no-untyped-call]
def map_blocks(
self,
func: Callable[..., Any],
*args: Any,
dtype: _DType_co | None = None,
chunks: tuple[int, ...] | None = None,
drop_axis: int | Sequence[int] | None = None,
new_axis: int | Sequence[int] | None = None,
**kwargs: Any,
) -> Any:
from dask.array import map_blocks
# pass through name, meta, token as kwargs
return map_blocks(
func,
*args,
dtype=dtype,
chunks=chunks,
drop_axis=drop_axis,
new_axis=new_axis,
**kwargs,
) # type: ignore[no-untyped-call]
def blockwise(
self,
func: Callable[..., Any],
out_ind: Iterable[Any],
*args: Any,
# can't type this as mypy assumes args are all same type, but dask blockwise args alternate types
name: str | None = None,
token: Any | None = None,
dtype: _DType_co | None = None,
adjust_chunks: dict[Any, Callable[..., Any]] | None = None,
new_axes: dict[Any, int] | None = None,
align_arrays: bool = True,
concatenate: bool | None = None,
meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None,
**kwargs: Any,
) -> DaskArray | Any:
from dask.array import blockwise
return blockwise(
func,
out_ind,
*args,
name=name,
token=token,
dtype=dtype,
adjust_chunks=adjust_chunks,
new_axes=new_axes,
align_arrays=align_arrays,
concatenate=concatenate,
meta=meta,
**kwargs,
) # type: ignore[no-untyped-call]
def unify_chunks(
self,
*args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types
**kwargs: Any,
) -> tuple[dict[str, _NormalizedChunks], list[DaskArray]]:
from dask.array.core import unify_chunks
return unify_chunks(*args, **kwargs) # type: ignore[no-any-return, no-untyped-call]
def store(
self,
sources: Any | Sequence[Any],
targets: Any,
**kwargs: Any,
) -> Any:
from dask.array import store
return store(
sources=sources,
targets=targets,
**kwargs,
)
def shuffle(
self, x: DaskArray, indexer: list[list[int]], axis: int, chunks: T_Chunks
) -> DaskArray:
import dask.array
if not module_available("dask", minversion="2024.08.1"):
raise ValueError(
"This method is very inefficient on dask<2024.08.1. Please upgrade."
)
if chunks is None:
chunks = "auto"
if chunks != "auto":
raise NotImplementedError("Only chunks='auto' is supported at present.")
return dask.array.shuffle(x, indexer, axis, chunks="auto")
|