File: pr-11020.patch

package info (click to toggle)
python-xarray 2025.12.0-2
links: PTS, VCS
area: main
in suites: sid
size: 13,456 kB
sloc: python: 118,717; makefile: 269
file content (178 lines) | stat: -rw-r--r-- 7,603 bytes
Description: PR 11020 - bugfix from upstream 
Bug-Origin: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1123151
Forwarded: not-needed
Last-Updated: 2026-01-06

--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -28,6 +28,10 @@
 Bug Fixes
 ~~~~~~~~~
 
+- :py:meth:`Dataset.map` now merges attrs from the function result and the original
+  using the ``drop_conflicts`` strategy when ``keep_attrs=True``, preserving attrs
+  set by the function (:issue:`11019`, :pull:`11020`).
+  By `Maximilian Roos <https://github.com/max-sixty>`_.
 - When assigning an indexed coordinate to a data variable or coordinate, coerce it from
   ``IndexVariable`` to ``Variable`` (:issue:`9859`, :issue:`10829`, :pull:`10909`).
   By `Julia Signell <https://github.com/jsignell>`_.
--- a/pixi.toml
+++ b/pixi.toml
@@ -143,6 +143,12 @@
 toolz = "0.12.*"
 zarr = "2.18.*"
 
+# TODO: Remove `platforms` restriction once pandas nightly has win-64 wheels again.
+# Without this, `pixi lock` fails because it can't solve the nightly feature for win-64,
+# which breaks RTD builds (RTD has no lock file cache, unlike GitHub Actions CI).
+[feature.nightly]
+platforms = ["linux-64", "osx-arm64"]
+
 [feature.nightly.dependencies]
 python = "*"
 
--- a/xarray/computation/weighted.py
+++ b/xarray/computation/weighted.py
@@ -544,14 +544,28 @@
 
         dataset = self.obj._to_temp_dataset()
         dataset = dataset.map(func, dim=dim, **kwargs)
-        return self.obj._from_temp_dataset(dataset)
+        result = self.obj._from_temp_dataset(dataset)
 
+        # Clear attrs when keep_attrs is explicitly False
+        # (weighted operations can propagate attrs from weights through internal computations)
+        if kwargs.get("keep_attrs") is False:
+            result.attrs = {}
+
+        return result
 
 class DatasetWeighted(Weighted["Dataset"]):
     def _implementation(self, func, dim, **kwargs) -> Dataset:
         self._check_dim(dim)
-        return self.obj.map(func, dim=dim, **kwargs)
+        result = self.obj.map(func, dim=dim, **kwargs)
+
+        # Clear attrs when keep_attrs is explicitly False
+        # (weighted operations can propagate attrs from weights through internal computations)
+        if kwargs.get("keep_attrs") is False:
+            result.attrs = {}
+            for var in result.data_vars.values():
+                var.attrs = {}
 
+        return result
 
 def _inject_docstring(cls, cls_name):
     cls.sum_of_weights.__doc__ = _SUM_OF_WEIGHTS_DOCSTRING.format(cls=cls_name)
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -6910,8 +6910,11 @@
             DataArray.
         keep_attrs : bool or None, optional
             If True, both the dataset's and variables' attributes (`attrs`) will be
-            copied from the original objects to the new ones. If False, the new dataset
-            and variables will be returned without copying the attributes.
+            combined from the original objects and the function results using the
+            ``drop_conflicts`` strategy: matching attrs are kept, conflicting attrs
+            are dropped. If False, the new dataset and variables will have only
+            the attributes set by the function.
+
         args : iterable, optional
             Positional arguments passed on to `func`.
         **kwargs : Any
@@ -6960,16 +6963,19 @@
         coords = Coordinates._construct_direct(coords=coord_vars, indexes=indexes)
 
         if keep_attrs:
+            # Merge attrs from function result and original, dropping conflicts
+            from xarray.structure.merge import merge_attrs
+
             for k, v in variables.items():
-                v._copy_attrs_from(self.data_vars[k])
+                v.attrs = merge_attrs(
+                    [v.attrs, self.data_vars[k].attrs], "drop_conflicts"
+                )                
             for k, v in coords.items():
                 if k in self.coords:
-                    v._copy_attrs_from(self.coords[k])
-        else:
-            for v in variables.values():
-                v.attrs = {}
-            for v in coords.values():
-                v.attrs = {}
+                    v.attrs = merge_attrs(
+                        [v.attrs, self.coords[k].attrs], "drop_conflicts"
+                    )
+        # When keep_attrs=False, leave attrs as the function returned them
 
         attrs = self.attrs if keep_attrs else None
         return type(self)(variables, coords=coords, attrs=attrs)
--- a/xarray/core/datatree.py
+++ b/xarray/core/datatree.py
@@ -397,8 +397,10 @@
             DataArray.
         keep_attrs : bool | None, optional
             If True, both the dataset's and variables' attributes (`attrs`) will be
-            copied from the original objects to the new ones. If False, the new dataset
-            and variables will be returned without copying the attributes.
+            combined from the original objects and the function results using the
+            ``drop_conflicts`` strategy: matching attrs are kept, conflicting attrs
+            are dropped. If False, the new dataset and variables will have only
+            the attributes set by the function.
         args : iterable, optional
             Positional arguments passed on to `func`.
         **kwargs : Any
@@ -438,8 +440,13 @@
             for k, v in self.data_vars.items()
         }
         if keep_attrs:
+            # Merge attrs from function result and original, dropping conflicts
+            from xarray.structure.merge import merge_attrs
+            
             for k, v in variables.items():
-                v._copy_attrs_from(self.data_vars[k])
+                v.attrs = merge_attrs(
+                    [v.attrs, self.data_vars[k].attrs], "drop_conflicts"
+                )
         attrs = self.attrs if keep_attrs else None
         # return type(self)(variables, attrs=attrs)
         return Dataset(variables, attrs=attrs)
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -6452,6 +6452,36 @@
         expected = xr.Dataset({"foo": 42, "bar": ("y", [4, 5])})
         assert_identical(result, expected)
 
+    def test_map_preserves_function_attrs(self) -> None:
+        # Regression test for GH11019
+        # Attrs added by function should be preserved in result
+        ds = xr.Dataset({"test": ("x", [1, 2, 3], {"original": "value"})})
+
+        def add_attr(da):
+            return da.assign_attrs(new_attr="foobar")
+
+        # With keep_attrs=True: merge using drop_conflicts (no conflict here)
+        result = ds.map(add_attr, keep_attrs=True)
+        assert result["test"].attrs == {"original": "value", "new_attr": "foobar"}
+
+        # With keep_attrs=False: function's attrs preserved
+        result = ds.map(add_attr, keep_attrs=False)
+        assert result["test"].attrs == {"original": "value", "new_attr": "foobar"}
+
+        # When function modifies existing attr with keep_attrs=True, conflict is dropped
+        def modify_attr(da):
+            return da.assign_attrs(original="modified", extra="added")
+
+        result = ds.map(modify_attr, keep_attrs=True)
+        assert result["test"].attrs == {
+            "extra": "added"
+        }  # "original" dropped due to conflict
+
+        # When function modifies existing attr with keep_attrs=False, function wins
+        result = ds.map(modify_attr, keep_attrs=False)
+        assert result["test"].attrs == {"original": "modified", "extra": "added"}
+
+
     def test_apply_pending_deprecated_map(self) -> None:
         data = create_test_data()
         data.attrs["foo"] = "bar"