File: test_clip.py

package info (click to toggle)
pandas 2.3.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 66,800 kB
  • sloc: python: 424,812; ansic: 9,190; sh: 264; xml: 102; makefile: 86
file content (199 lines) | stat: -rw-r--r-- 7,554 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import numpy as np
import pytest

from pandas import (
    DataFrame,
    Series,
)
import pandas._testing as tm


class TestDataFrameClip:
    def test_clip(self, float_frame):
        median = float_frame.median().median()
        original = float_frame.copy()

        double = float_frame.clip(upper=median, lower=median)
        assert not (double.values != median).any()

        # Verify that float_frame was not changed inplace
        assert (float_frame.values == original.values).all()

    def test_inplace_clip(self, float_frame):
        # GH#15388
        median = float_frame.median().median()
        frame_copy = float_frame.copy()

        return_value = frame_copy.clip(upper=median, lower=median, inplace=True)
        assert return_value is None
        assert not (frame_copy.values != median).any()

    def test_dataframe_clip(self):
        # GH#2747
        df = DataFrame(np.random.default_rng(2).standard_normal((1000, 2)))

        for lb, ub in [(-1, 1), (1, -1)]:
            clipped_df = df.clip(lb, ub)

            lb, ub = min(lb, ub), max(ub, lb)
            lb_mask = df.values <= lb
            ub_mask = df.values >= ub
            mask = ~lb_mask & ~ub_mask
            assert (clipped_df.values[lb_mask] == lb).all()
            assert (clipped_df.values[ub_mask] == ub).all()
            assert (clipped_df.values[mask] == df.values[mask]).all()

    def test_clip_mixed_numeric(self):
        # clip on mixed integer or floats
        # GH#24162, clipping now preserves numeric types per column
        df = DataFrame({"A": [1, 2, 3], "B": [1.0, np.nan, 3.0]})
        result = df.clip(1, 2)
        expected = DataFrame({"A": [1, 2, 2], "B": [1.0, np.nan, 2.0]})
        tm.assert_frame_equal(result, expected)

        df = DataFrame([[1, 2, 3.4], [3, 4, 5.6]], columns=["foo", "bar", "baz"])
        expected = df.dtypes
        result = df.clip(upper=3).dtypes
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("inplace", [True, False])
    def test_clip_against_series(self, inplace):
        # GH#6966

        df = DataFrame(np.random.default_rng(2).standard_normal((1000, 2)))
        lb = Series(np.random.default_rng(2).standard_normal(1000))
        ub = lb + 1

        original = df.copy()
        clipped_df = df.clip(lb, ub, axis=0, inplace=inplace)

        if inplace:
            clipped_df = df

        for i in range(2):
            lb_mask = original.iloc[:, i] <= lb
            ub_mask = original.iloc[:, i] >= ub
            mask = ~lb_mask & ~ub_mask

            result = clipped_df.loc[lb_mask, i]
            tm.assert_series_equal(result, lb[lb_mask], check_names=False)
            assert result.name == i

            result = clipped_df.loc[ub_mask, i]
            tm.assert_series_equal(result, ub[ub_mask], check_names=False)
            assert result.name == i

            tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i])

    @pytest.mark.parametrize("inplace", [True, False])
    @pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])])
    @pytest.mark.parametrize(
        "axis,res",
        [
            (0, [[2.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 7.0, 7.0]]),
            (1, [[2.0, 3.0, 4.0], [4.0, 5.0, 6.0], [5.0, 6.0, 7.0]]),
        ],
    )
    def test_clip_against_list_like(self, inplace, lower, axis, res):
        # GH#15390
        arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])

        original = DataFrame(
            arr, columns=["one", "two", "three"], index=["a", "b", "c"]
        )

        result = original.clip(lower=lower, upper=[5, 6, 7], axis=axis, inplace=inplace)

        expected = DataFrame(res, columns=original.columns, index=original.index)
        if inplace:
            result = original
        tm.assert_frame_equal(result, expected, check_exact=True)

    @pytest.mark.parametrize("axis", [0, 1, None])
    def test_clip_against_frame(self, axis):
        df = DataFrame(np.random.default_rng(2).standard_normal((1000, 2)))
        lb = DataFrame(np.random.default_rng(2).standard_normal((1000, 2)))
        ub = lb + 1

        clipped_df = df.clip(lb, ub, axis=axis)

        lb_mask = df <= lb
        ub_mask = df >= ub
        mask = ~lb_mask & ~ub_mask

        tm.assert_frame_equal(clipped_df[lb_mask], lb[lb_mask])
        tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask])
        tm.assert_frame_equal(clipped_df[mask], df[mask])

    def test_clip_against_unordered_columns(self):
        # GH#20911
        df1 = DataFrame(
            np.random.default_rng(2).standard_normal((1000, 4)),
            columns=["A", "B", "C", "D"],
        )
        df2 = DataFrame(
            np.random.default_rng(2).standard_normal((1000, 4)),
            columns=["D", "A", "B", "C"],
        )
        df3 = DataFrame(df2.values - 1, columns=["B", "D", "C", "A"])
        result_upper = df1.clip(lower=0, upper=df2)
        expected_upper = df1.clip(lower=0, upper=df2[df1.columns])
        result_lower = df1.clip(lower=df3, upper=3)
        expected_lower = df1.clip(lower=df3[df1.columns], upper=3)
        result_lower_upper = df1.clip(lower=df3, upper=df2)
        expected_lower_upper = df1.clip(lower=df3[df1.columns], upper=df2[df1.columns])
        tm.assert_frame_equal(result_upper, expected_upper)
        tm.assert_frame_equal(result_lower, expected_lower)
        tm.assert_frame_equal(result_lower_upper, expected_lower_upper)

    def test_clip_with_na_args(self, float_frame):
        """Should process np.nan argument as None"""
        # GH#17276
        tm.assert_frame_equal(float_frame.clip(np.nan), float_frame)
        tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame)

        # GH#19992 and adjusted in GH#40420
        df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]})

        msg = "Downcasting behavior in Series and DataFrame methods 'where'"
        # TODO: avoid this warning here?  seems like we should never be upcasting
        #  in the first place?
        with tm.assert_produces_warning(FutureWarning, match=msg):
            result = df.clip(lower=[4, 5, np.nan], axis=0)
        expected = DataFrame(
            {"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}
        )
        tm.assert_frame_equal(result, expected)

        result = df.clip(lower=[4, 5, np.nan], axis=1)
        expected = DataFrame(
            {"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [7, 8, 9]}
        )
        tm.assert_frame_equal(result, expected)

        # GH#40420
        data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]}
        df = DataFrame(data)
        t = Series([2, -4, np.nan, 6, 3])
        with tm.assert_produces_warning(FutureWarning, match=msg):
            result = df.clip(lower=t, axis=0)
        expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]})
        tm.assert_frame_equal(result, expected)

    def test_clip_int_data_with_float_bound(self):
        # GH51472
        df = DataFrame({"a": [1, 2, 3]})
        result = df.clip(lower=1.5)
        expected = DataFrame({"a": [1.5, 2.0, 3.0]})
        tm.assert_frame_equal(result, expected)

    def test_clip_with_list_bound(self):
        # GH#54817
        df = DataFrame([1, 5])
        expected = DataFrame([3, 5])
        result = df.clip([3])
        tm.assert_frame_equal(result, expected)

        expected = DataFrame([1, 3])
        result = df.clip(upper=[3])
        tm.assert_frame_equal(result, expected)