From: Colin Watson <cjwatson@debian.org>
Date: Sun, 19 Oct 2025 02:17:51 +0100
Subject: Handle estimator checking changes in scikit-learn 1.6

Forwarded: https://github.com/trevorstephens/gplearn/pull/305
Bug-Debian: https://bugs.debian.org/1117991
Last-Update: 2025-10-19
---
 .github/workflows/build.yml            |  2 +-
 doc/rtd-pip-requirements               |  2 +-
 gplearn/genetic.py                     | 71 +++++++++++++++++++++++++++-------
 gplearn/tests/test_estimator_checks.py | 11 +++++-
 gplearn/utils.py                       |  8 ++++
 setup.py                               |  3 +-
 6 files changed, 80 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 3b198b8..5df8e5b 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -41,7 +41,7 @@ jobs:
         python -m pip install pandas
     - name: Install minimal dependencies
       if: ${{ matrix.python_version == '3.8' }}
-      run: python -m pip install scikit-learn==1.0.2 joblib==1.0.0
+      run: python -m pip install scikit-learn==1.1.0 joblib==1.0.0
     - name: Install gplearn
       run: python -m pip install .
     - name: Describe Python environment
diff --git a/doc/rtd-pip-requirements b/doc/rtd-pip-requirements
index 947fdee..fbf725e 100644
--- a/doc/rtd-pip-requirements
+++ b/doc/rtd-pip-requirements
@@ -1,5 +1,5 @@
 numpy>=1.8.1
 numpydoc>=0.5
 scipy>=0.13
-scikit-learn>=0.22.1
+scikit-learn>=1.1.0
 joblib>=0.13.0
diff --git a/gplearn/genetic.py b/gplearn/genetic.py
index 157bc92..d64dedb 100644
--- a/gplearn/genetic.py
+++ b/gplearn/genetic.py
@@ -23,11 +23,13 @@ from sklearn.exceptions import NotFittedError
 from sklearn.utils import compute_sample_weight
 from sklearn.utils.validation import check_array, _check_sample_weight
 from sklearn.utils.multiclass import check_classification_targets
+from sklearn.utils.multiclass import type_of_target
 
 from ._program import _Program
 from .fitness import _fitness_map, _Fitness
 from .functions import _function_map, _Function, sig1 as sigmoid
 from .utils import _partition_estimators
+from .utils import _sklearn_version_ge
 from .utils import check_random_state
 
 __all__ = ['SymbolicRegressor', 'SymbolicClassifier', 'SymbolicTransformer']
@@ -301,6 +303,18 @@ class BaseSymbolic(BaseEstimator, metaclass=ABCMeta):
         if isinstance(self, ClassifierMixin):
             X, y = self._validate_data(X, y, y_numeric=False)
             check_classification_targets(y)
+            # Once we require scikit-learn >= 1.6, this should pass
+            # raise_unknown=True rather than checking for "unknown"
+            # manually.
+            y_type = type_of_target(y, input_name="y")
+            if y_type == "unknown":
+                raise ValueError("Unknown label type for y: %r" % y)
+            elif y_type != "binary":
+                raise ValueError(
+                    "Only binary classification is supported. The type of the "
+                    "target is %s."
+                    % y_type
+                )
 
             if self.class_weight:
                 if sample_weight is None:
@@ -599,7 +613,7 @@ class BaseSymbolic(BaseEstimator, metaclass=ABCMeta):
         return self
 
 
-class SymbolicRegressor(BaseSymbolic, RegressorMixin):
+class SymbolicRegressor(RegressorMixin, BaseSymbolic):
 
     """A Genetic Programming symbolic regressor.
 
@@ -868,7 +882,15 @@ class SymbolicRegressor(BaseSymbolic, RegressorMixin):
         if not hasattr(self, '_program'):
             raise NotFittedError('SymbolicRegressor not fitted.')
 
-        X = check_array(X)
+        try:
+            # scikit-learn >= 1.6
+            from sklearn.utils.validation import validate_data
+
+            X = validate_data(self, X, reset=False)
+        except ImportError:
+            # scikit-learn < 1.6
+            X = check_array(X)
+
         _, n_features = X.shape
         if self.n_features_in_ != n_features:
             raise ValueError('Number of features of the model must match the '
@@ -881,7 +903,7 @@ class SymbolicRegressor(BaseSymbolic, RegressorMixin):
         return y
 
 
-class SymbolicClassifier(BaseSymbolic, ClassifierMixin):
+class SymbolicClassifier(ClassifierMixin, BaseSymbolic):
 
     """A Genetic Programming symbolic classifier.
 
@@ -1142,6 +1164,11 @@ class SymbolicClassifier(BaseSymbolic, ClassifierMixin):
             return self.__repr__()
         return self._program.__str__()
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.classifier_tags.multi_class = False
+        return tags
+
     def _more_tags(self):
         return {'binary_only': True}
 
@@ -1164,7 +1191,15 @@ class SymbolicClassifier(BaseSymbolic, ClassifierMixin):
         if not hasattr(self, '_program'):
             raise NotFittedError('SymbolicClassifier not fitted.')
 
-        X = check_array(X)
+        try:
+            # scikit-learn >= 1.6
+            from sklearn.utils.validation import validate_data
+
+            X = validate_data(self, X, reset=False)
+        except ImportError:
+            # scikit-learn < 1.6
+            X = check_array(X)
+
         _, n_features = X.shape
         if self.n_features_in_ != n_features:
             raise ValueError('Number of features of the model must match the '
@@ -1196,7 +1231,7 @@ class SymbolicClassifier(BaseSymbolic, ClassifierMixin):
         return self.classes_.take(np.argmax(proba, axis=1), axis=0)
 
 
-class SymbolicTransformer(BaseSymbolic, TransformerMixin):
+class SymbolicTransformer(TransformerMixin, BaseSymbolic):
 
     """A Genetic Programming symbolic transformer.
 
@@ -1467,14 +1502,16 @@ class SymbolicTransformer(BaseSymbolic, TransformerMixin):
         output = str([gp.__str__() for gp in self])
         return output.replace("',", ",\n").replace("'", "")
 
-    def _more_tags(self):
-        return {
-            "_xfail_checks": {
-                "check_sample_weights_invariance": (
-                    "zero sample_weight is not equivalent to removing samples"
-                ),
+    if not _sklearn_version_ge("1.6"):
+        def _more_tags(self):
+            return {
+                "_xfail_checks": {
+                    "check_sample_weights_invariance": (
+                        "zero sample_weight is not equivalent to removing "
+                        "samples"
+                    ),
+                }
             }
-        }
 
     def transform(self, X):
         """Transform X according to the fitted transformer.
@@ -1494,7 +1531,15 @@ class SymbolicTransformer(BaseSymbolic, TransformerMixin):
         if not hasattr(self, '_best_programs'):
             raise NotFittedError('SymbolicTransformer not fitted.')
 
-        X = check_array(X)
+        try:
+            # scikit-learn >= 1.6
+            from sklearn.utils.validation import validate_data
+
+            X = validate_data(self, X, reset=False)
+        except ImportError:
+            # scikit-learn < 1.6
+            X = check_array(X)
+
         _, n_features = X.shape
         if self.n_features_in_ != n_features:
             raise ValueError('Number of features of the model must match the '
diff --git a/gplearn/tests/test_estimator_checks.py b/gplearn/tests/test_estimator_checks.py
index af57fb3..534b50e 100644
--- a/gplearn/tests/test_estimator_checks.py
+++ b/gplearn/tests/test_estimator_checks.py
@@ -10,6 +10,7 @@ from sklearn.utils.estimator_checks import check_estimator
 
 from gplearn.genetic import SymbolicClassifier, SymbolicRegressor
 from gplearn.genetic import SymbolicTransformer
+from gplearn.utils import _sklearn_version_ge
 
 
 def test_sklearn_regressor_checks():
@@ -29,6 +30,14 @@ def test_sklearn_classifier_checks():
 def test_sklearn_transformer_checks():
     """Run the sklearn estimator validation checks on SymbolicTransformer"""
 
+    kwargs = {}
+    if _sklearn_version_ge("1.6"):
+        kwargs["expected_failed_checks"] = {
+            "check_sample_weights_invariance": (
+                "zero sample_weight is not equivalent to removing samples"
+            ),
+        }
     check_estimator(SymbolicTransformer(population_size=50,
                                         hall_of_fame=10,
-                                        generations=5))
+                                        generations=5),
+                    **kwargs)
diff --git a/gplearn/utils.py b/gplearn/utils.py
index 7eee1bd..a210dec 100644
--- a/gplearn/utils.py
+++ b/gplearn/utils.py
@@ -6,10 +6,18 @@ order to maintain compatibility across different versions of scikit-learn.
 
 """
 
+import importlib.metadata
 import numbers
 
 import numpy as np
 from joblib import cpu_count
+from packaging.version import Version
+
+
+def _sklearn_version_ge(min_version):
+    """Check whether we have at least min_version of scikit-learn."""
+    sklearn_version = importlib.metadata.version("scikit-learn")
+    return Version(sklearn_version) >= Version(min_version)
 
 
 def check_random_state(seed):
diff --git a/setup.py b/setup.py
index 1eef653..72b13fd 100644
--- a/setup.py
+++ b/setup.py
@@ -34,5 +34,6 @@ setup(name='gplearn',
                                       '*.tests.*']),
       zip_safe=False,
       package_data={'': ['LICENSE']},
-      install_requires=['scikit-learn>=1.0.2',
+      install_requires=['packaging',
+                        'scikit-learn>=1.1.0',
                         'joblib>=1.0.0'])
