File: scikit-learn-1.7.patch

package info (click to toggle)
sklearn-pandas 2.2.0-6
links: PTS, VCS
area: main
in suites: forky, sid
size: 460 kB
sloc: python: 1,213; sh: 12; makefile: 8
file content (135 lines) | stat: -rw-r--r-- 4,687 bytes
From: Aditya Mehra <adityamehra@mac.mynetworksettings.com>
Date: Fri, 17 Oct 2025 23:44:58 -0400
Subject: Fix ImportError with sklearn 1.7.0+ by replacing tosequence

- Remove deprecated sklearn.utils.tosequence import (removed in sklearn 1.7)
- Convert steps to list at start of __init__ to handle all input types
- Replace tosequence(steps) with direct list assignment
- Fixes compatibility with scikit-learn >= 1.7.0
- Maintains backward compatibility with older sklearn versions
- Add regression tests for list, tuple, and generator inputs
- Verify steps attribute is always a list type

The tosequence utility was deprecated in sklearn 1.5 and removed in 1.7.
Using list() directly provides the same functionality without depending
on sklearn internals.

Fixes #267

Origin: other, https://github.com/scikit-learn-contrib/sklearn-pandas/pull/269
Bug: https://github.com/scikit-learn-contrib/sklearn-pandas/issues/267
Bug-Debian: https://bugs.debian.org/1117989
Last-Update: 2025-10-19
---
 sklearn_pandas/pipeline.py |  7 +++--
 tests/test_pipeline.py     | 70 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/sklearn_pandas/pipeline.py b/sklearn_pandas/pipeline.py
index cde2fea..34c6043 100644
--- a/sklearn_pandas/pipeline.py
+++ b/sklearn_pandas/pipeline.py
@@ -1,5 +1,4 @@
 from sklearn.pipeline import _name_estimators, Pipeline
-from sklearn.utils import tosequence
 
 
 def _call_fit(fit_method, X, y=None, **kwargs):
@@ -35,13 +34,17 @@ class TransformerPipeline(Pipeline):
     """
 
     def __init__(self, steps):
+        # Convert to list first to handle generators/iterators
+        # (replacement for removed sklearn.utils.tosequence)
+        steps = list(steps)
+
         names, estimators = zip(*steps)
         if len(dict(steps)) != len(steps):
             raise ValueError(
                 "Provided step names are not unique: %s" % (names,))
 
         # shallow copy of steps
-        self.steps = tosequence(steps)
+        self.steps = steps
         estimator = estimators[-1]
 
         for e in estimators:
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index ee57b57..dd300c6 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -98,3 +98,73 @@ def test_raises_type_error(mock_fit):
     """
     with pytest.raises(TypeError):
         _call_fit(Trans().fit, 'X', 'y', kwarg='kwarg')
+
+
+def test_transformer_pipeline_accepts_list():
+    """
+    Test that TransformerPipeline accepts list of steps.
+    Regression test for issue #267 (tosequence removal in sklearn 1.7.0)
+    """
+    steps = [
+        ('trans1', Trans()),
+        ('trans2', Trans())
+    ]
+
+    pipeline = TransformerPipeline(steps)
+    assert len(pipeline.steps) == 2
+    assert pipeline.steps[0][0] == 'trans1'
+    assert pipeline.steps[1][0] == 'trans2'
+    assert isinstance(pipeline.steps, list)
+
+
+def test_transformer_pipeline_accepts_tuple():
+    """
+    Test that TransformerPipeline accepts tuple of steps.
+    Regression test for issue #267 (tosequence removal in sklearn 1.7.0)
+    """
+    steps = (
+        ('trans1', Trans()),
+        ('trans2', Trans())
+    )
+
+    pipeline = TransformerPipeline(steps)
+    assert len(pipeline.steps) == 2
+    assert pipeline.steps[0][0] == 'trans1'
+    assert pipeline.steps[1][0] == 'trans2'
+    assert isinstance(pipeline.steps, list)
+
+
+def test_transformer_pipeline_accepts_generator():
+    """
+    Test that TransformerPipeline accepts generator of steps.
+    Regression test for issue #267 (tosequence removal in sklearn 1.7.0)
+    """
+
+    def step_generator():
+        yield ('trans1', Trans())
+        yield ('trans2', Trans())
+
+    pipeline = TransformerPipeline(step_generator())
+    assert len(pipeline.steps) == 2
+    assert pipeline.steps[0][0] == 'trans1'
+    assert pipeline.steps[1][0] == 'trans2'
+    assert isinstance(pipeline.steps, list)
+
+
+def test_transformer_pipeline_steps_is_list():
+    """
+    Test that steps attribute is always a list after initialization.
+    This ensures list() conversion works correctly (replacing tosequence).
+    Regression test for issue #267 (tosequence removal in sklearn 1.7.0)
+    """
+    # Test with list input
+    pipeline1 = TransformerPipeline([('trans', Trans())])
+    assert isinstance(pipeline1.steps, list)
+
+    # Test with tuple input
+    pipeline2 = TransformerPipeline((('trans', Trans()),))
+    assert isinstance(pipeline2.steps, list)
+
+    # Test with iterator input
+    pipeline3 = TransformerPipeline(iter([('trans', Trans())]))
+    assert isinstance(pipeline3.steps, list)
\ No newline at end of file