1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
|
Description: PR 11020 - bugfix from upstream
Bug-Origin: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1123151
Forwarded: not-needed
Last-Updated: 2026-01-06
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -28,6 +28,10 @@
Bug Fixes
~~~~~~~~~
+- :py:meth:`Dataset.map` now merges attrs from the function result and the original
+ using the ``drop_conflicts`` strategy when ``keep_attrs=True``, preserving attrs
+ set by the function (:issue:`11019`, :pull:`11020`).
+ By `Maximilian Roos <https://github.com/max-sixty>`_.
- When assigning an indexed coordinate to a data variable or coordinate, coerce it from
``IndexVariable`` to ``Variable`` (:issue:`9859`, :issue:`10829`, :pull:`10909`).
By `Julia Signell <https://github.com/jsignell>`_.
--- a/pixi.toml
+++ b/pixi.toml
@@ -143,6 +143,12 @@
toolz = "0.12.*"
zarr = "2.18.*"
+# TODO: Remove `platforms` restriction once pandas nightly has win-64 wheels again.
+# Without this, `pixi lock` fails because it can't solve the nightly feature for win-64,
+# which breaks RTD builds (RTD has no lock file cache, unlike GitHub Actions CI).
+[feature.nightly]
+platforms = ["linux-64", "osx-arm64"]
+
[feature.nightly.dependencies]
python = "*"
--- a/xarray/computation/weighted.py
+++ b/xarray/computation/weighted.py
@@ -544,14 +544,28 @@
dataset = self.obj._to_temp_dataset()
dataset = dataset.map(func, dim=dim, **kwargs)
- return self.obj._from_temp_dataset(dataset)
+ result = self.obj._from_temp_dataset(dataset)
+ # Clear attrs when keep_attrs is explicitly False
+ # (weighted operations can propagate attrs from weights through internal computations)
+ if kwargs.get("keep_attrs") is False:
+ result.attrs = {}
+
+ return result
class DatasetWeighted(Weighted["Dataset"]):
def _implementation(self, func, dim, **kwargs) -> Dataset:
self._check_dim(dim)
- return self.obj.map(func, dim=dim, **kwargs)
+ result = self.obj.map(func, dim=dim, **kwargs)
+
+ # Clear attrs when keep_attrs is explicitly False
+ # (weighted operations can propagate attrs from weights through internal computations)
+ if kwargs.get("keep_attrs") is False:
+ result.attrs = {}
+ for var in result.data_vars.values():
+ var.attrs = {}
+ return result
def _inject_docstring(cls, cls_name):
cls.sum_of_weights.__doc__ = _SUM_OF_WEIGHTS_DOCSTRING.format(cls=cls_name)
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -6910,8 +6910,11 @@
DataArray.
keep_attrs : bool or None, optional
If True, both the dataset's and variables' attributes (`attrs`) will be
- copied from the original objects to the new ones. If False, the new dataset
- and variables will be returned without copying the attributes.
+ combined from the original objects and the function results using the
+ ``drop_conflicts`` strategy: matching attrs are kept, conflicting attrs
+ are dropped. If False, the new dataset and variables will have only
+ the attributes set by the function.
+
args : iterable, optional
Positional arguments passed on to `func`.
**kwargs : Any
@@ -6960,16 +6963,19 @@
coords = Coordinates._construct_direct(coords=coord_vars, indexes=indexes)
if keep_attrs:
+ # Merge attrs from function result and original, dropping conflicts
+ from xarray.structure.merge import merge_attrs
+
for k, v in variables.items():
- v._copy_attrs_from(self.data_vars[k])
+ v.attrs = merge_attrs(
+ [v.attrs, self.data_vars[k].attrs], "drop_conflicts"
+ )
for k, v in coords.items():
if k in self.coords:
- v._copy_attrs_from(self.coords[k])
- else:
- for v in variables.values():
- v.attrs = {}
- for v in coords.values():
- v.attrs = {}
+ v.attrs = merge_attrs(
+ [v.attrs, self.coords[k].attrs], "drop_conflicts"
+ )
+ # When keep_attrs=False, leave attrs as the function returned them
attrs = self.attrs if keep_attrs else None
return type(self)(variables, coords=coords, attrs=attrs)
--- a/xarray/core/datatree.py
+++ b/xarray/core/datatree.py
@@ -397,8 +397,10 @@
DataArray.
keep_attrs : bool | None, optional
If True, both the dataset's and variables' attributes (`attrs`) will be
- copied from the original objects to the new ones. If False, the new dataset
- and variables will be returned without copying the attributes.
+ combined from the original objects and the function results using the
+ ``drop_conflicts`` strategy: matching attrs are kept, conflicting attrs
+ are dropped. If False, the new dataset and variables will have only
+ the attributes set by the function.
args : iterable, optional
Positional arguments passed on to `func`.
**kwargs : Any
@@ -438,8 +440,13 @@
for k, v in self.data_vars.items()
}
if keep_attrs:
+ # Merge attrs from function result and original, dropping conflicts
+ from xarray.structure.merge import merge_attrs
+
for k, v in variables.items():
- v._copy_attrs_from(self.data_vars[k])
+ v.attrs = merge_attrs(
+ [v.attrs, self.data_vars[k].attrs], "drop_conflicts"
+ )
attrs = self.attrs if keep_attrs else None
# return type(self)(variables, attrs=attrs)
return Dataset(variables, attrs=attrs)
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -6452,6 +6452,36 @@
expected = xr.Dataset({"foo": 42, "bar": ("y", [4, 5])})
assert_identical(result, expected)
+ def test_map_preserves_function_attrs(self) -> None:
+ # Regression test for GH11019
+ # Attrs added by function should be preserved in result
+ ds = xr.Dataset({"test": ("x", [1, 2, 3], {"original": "value"})})
+
+ def add_attr(da):
+ return da.assign_attrs(new_attr="foobar")
+
+ # With keep_attrs=True: merge using drop_conflicts (no conflict here)
+ result = ds.map(add_attr, keep_attrs=True)
+ assert result["test"].attrs == {"original": "value", "new_attr": "foobar"}
+
+ # With keep_attrs=False: function's attrs preserved
+ result = ds.map(add_attr, keep_attrs=False)
+ assert result["test"].attrs == {"original": "value", "new_attr": "foobar"}
+
+ # When function modifies existing attr with keep_attrs=True, conflict is dropped
+ def modify_attr(da):
+ return da.assign_attrs(original="modified", extra="added")
+
+ result = ds.map(modify_attr, keep_attrs=True)
+ assert result["test"].attrs == {
+ "extra": "added"
+ } # "original" dropped due to conflict
+
+ # When function modifies existing attr with keep_attrs=False, function wins
+ result = ds.map(modify_attr, keep_attrs=False)
+ assert result["test"].attrs == {"original": "modified", "extra": "added"}
+
+
def test_apply_pending_deprecated_map(self) -> None:
data = create_test_data()
data.attrs["foo"] = "bar"
|