File: test_dataframe.py

package info (click to toggle)
scikit-learn 1.8.0%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 26,132 kB
  • sloc: python: 224,867; cpp: 5,790; ansic: 846; makefile: 190; javascript: 179
file content (84 lines) | stat: -rw-r--r-- 2,590 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""Tests for dataframe detection functions."""

import numpy as np
import pytest

from sklearn._min_dependencies import dependent_packages
from sklearn.utils._dataframe import is_df_or_series, is_pandas_df, is_polars_df
from sklearn.utils._testing import _convert_container


@pytest.mark.parametrize("constructor_name", ["pyarrow", "dataframe", "polars"])
def test_is_df_or_series(constructor_name):
    df = _convert_container([[1, 4, 2], [3, 3, 6]], constructor_name)

    assert is_df_or_series(df)
    assert not is_df_or_series(np.asarray([1, 2, 3]))


@pytest.mark.parametrize("constructor_name", ["pyarrow", "dataframe", "polars"])
def test_is_pandas_df_other_libraries(constructor_name):
    df = _convert_container([[1, 4, 2], [3, 3, 6]], constructor_name)
    if constructor_name in ("pyarrow", "polars"):
        assert not is_pandas_df(df)
    else:
        assert is_pandas_df(df)


def test_is_pandas_df():
    """Check behavior of is_pandas_df when pandas is installed."""
    pd = pytest.importorskip("pandas")
    df = pd.DataFrame([[1, 2, 3]])
    assert is_pandas_df(df)
    assert not is_pandas_df(np.asarray([1, 2, 3]))
    assert not is_pandas_df(1)


def test_is_pandas_df_pandas_not_installed(hide_available_pandas):
    """Check is_pandas_df when pandas is not installed."""

    assert not is_pandas_df(np.asarray([1, 2, 3]))
    assert not is_pandas_df(1)


@pytest.mark.parametrize(
    "constructor_name, minversion",
    [
        ("pyarrow", dependent_packages["pyarrow"][0]),
        ("dataframe", dependent_packages["pandas"][0]),
        ("polars", dependent_packages["polars"][0]),
    ],
)
def test_is_polars_df_other_libraries(constructor_name, minversion):
    df = _convert_container(
        [[1, 4, 2], [3, 3, 6]],
        constructor_name,
        minversion=minversion,
    )
    if constructor_name in ("pyarrow", "dataframe"):
        assert not is_polars_df(df)
    else:
        assert is_polars_df(df)


def test_is_polars_df_for_duck_typed_polars_dataframe():
    """Check is_polars_df for object that looks like a polars dataframe"""

    class NotAPolarsDataFrame:
        def __init__(self):
            self.columns = [1, 2, 3]
            self.schema = "my_schema"

    not_a_polars_df = NotAPolarsDataFrame()
    assert not is_polars_df(not_a_polars_df)


def test_is_polars_df():
    """Check that is_polars_df return False for non-dataframe objects."""

    class LooksLikePolars:
        def __init__(self):
            self.columns = ["a", "b"]
            self.schema = ["a", "b"]

    assert not is_polars_df(LooksLikePolars())