File: test_bar.py

package info (click to toggle)
pandas 2.3.3%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 67,184 kB
  • sloc: python: 425,585; ansic: 9,219; sh: 264; xml: 102; makefile: 85
file content (360 lines) | stat: -rw-r--r-- 12,103 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
import io

import numpy as np
import pytest

import pandas.util._test_decorators as td
from pandas import (
    NA,
    DataFrame,
    read_csv,
)

td.versioned_importorskip("jinja2")


def bar_grad(a=None, b=None, c=None, d=None):
    """Used in multiple tests to simplify formatting of expected result"""
    ret = [("width", "10em")]
    if all(x is None for x in [a, b, c, d]):
        return ret
    return ret + [
        (
            "background",
            f"linear-gradient(90deg,{','.join([x for x in [a, b, c, d] if x])})",
        )
    ]


def no_bar():
    return bar_grad()


def bar_to(x, color="#d65f5f"):
    return bar_grad(f" {color} {x:.1f}%", f" transparent {x:.1f}%")


def bar_from_to(x, y, color="#d65f5f"):
    return bar_grad(
        f" transparent {x:.1f}%",
        f" {color} {x:.1f}%",
        f" {color} {y:.1f}%",
        f" transparent {y:.1f}%",
    )


@pytest.fixture
def df_pos():
    return DataFrame([[1], [2], [3]])


@pytest.fixture
def df_neg():
    return DataFrame([[-1], [-2], [-3]])


@pytest.fixture
def df_mix():
    return DataFrame([[-3], [1], [2]])


@pytest.mark.parametrize(
    "align, exp",
    [
        ("left", [no_bar(), bar_to(50), bar_to(100)]),
        ("right", [bar_to(100), bar_from_to(50, 100), no_bar()]),
        ("mid", [bar_to(33.33), bar_to(66.66), bar_to(100)]),
        ("zero", [bar_from_to(50, 66.7), bar_from_to(50, 83.3), bar_from_to(50, 100)]),
        ("mean", [bar_to(50), no_bar(), bar_from_to(50, 100)]),
        (2.0, [bar_to(50), no_bar(), bar_from_to(50, 100)]),
        (np.median, [bar_to(50), no_bar(), bar_from_to(50, 100)]),
    ],
)
def test_align_positive_cases(df_pos, align, exp):
    # test different align cases for all positive values
    result = df_pos.style.bar(align=align)._compute().ctx
    expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
    assert result == expected


@pytest.mark.parametrize(
    "align, exp",
    [
        ("left", [bar_to(100), bar_to(50), no_bar()]),
        ("right", [no_bar(), bar_from_to(50, 100), bar_to(100)]),
        ("mid", [bar_from_to(66.66, 100), bar_from_to(33.33, 100), bar_to(100)]),
        ("zero", [bar_from_to(33.33, 50), bar_from_to(16.66, 50), bar_to(50)]),
        ("mean", [bar_from_to(50, 100), no_bar(), bar_to(50)]),
        (-2.0, [bar_from_to(50, 100), no_bar(), bar_to(50)]),
        (np.median, [bar_from_to(50, 100), no_bar(), bar_to(50)]),
    ],
)
def test_align_negative_cases(df_neg, align, exp):
    # test different align cases for all negative values
    result = df_neg.style.bar(align=align)._compute().ctx
    expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
    assert result == expected


@pytest.mark.parametrize(
    "align, exp",
    [
        ("left", [no_bar(), bar_to(80), bar_to(100)]),
        ("right", [bar_to(100), bar_from_to(80, 100), no_bar()]),
        ("mid", [bar_to(60), bar_from_to(60, 80), bar_from_to(60, 100)]),
        ("zero", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
        ("mean", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
        (-0.0, [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
        (np.nanmedian, [bar_to(50), no_bar(), bar_from_to(50, 62.5)]),
    ],
)
@pytest.mark.parametrize("nans", [True, False])
def test_align_mixed_cases(df_mix, align, exp, nans):
    # test different align cases for mixed positive and negative values
    # also test no impact of NaNs and no_bar
    expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
    if nans:
        df_mix.loc[3, :] = np.nan
        expected.update({(3, 0): no_bar()})
    result = df_mix.style.bar(align=align)._compute().ctx
    assert result == expected


@pytest.mark.parametrize(
    "align, exp",
    [
        (
            "left",
            {
                "index": [[no_bar(), no_bar()], [bar_to(100), bar_to(100)]],
                "columns": [[no_bar(), bar_to(100)], [no_bar(), bar_to(100)]],
                "none": [[no_bar(), bar_to(33.33)], [bar_to(66.66), bar_to(100)]],
            },
        ),
        (
            "mid",
            {
                "index": [[bar_to(33.33), bar_to(50)], [bar_to(100), bar_to(100)]],
                "columns": [[bar_to(50), bar_to(100)], [bar_to(75), bar_to(100)]],
                "none": [[bar_to(25), bar_to(50)], [bar_to(75), bar_to(100)]],
            },
        ),
        (
            "zero",
            {
                "index": [
                    [bar_from_to(50, 66.66), bar_from_to(50, 75)],
                    [bar_from_to(50, 100), bar_from_to(50, 100)],
                ],
                "columns": [
                    [bar_from_to(50, 75), bar_from_to(50, 100)],
                    [bar_from_to(50, 87.5), bar_from_to(50, 100)],
                ],
                "none": [
                    [bar_from_to(50, 62.5), bar_from_to(50, 75)],
                    [bar_from_to(50, 87.5), bar_from_to(50, 100)],
                ],
            },
        ),
        (
            2,
            {
                "index": [
                    [bar_to(50), no_bar()],
                    [bar_from_to(50, 100), bar_from_to(50, 100)],
                ],
                "columns": [
                    [bar_to(50), no_bar()],
                    [bar_from_to(50, 75), bar_from_to(50, 100)],
                ],
                "none": [
                    [bar_from_to(25, 50), no_bar()],
                    [bar_from_to(50, 75), bar_from_to(50, 100)],
                ],
            },
        ),
    ],
)
@pytest.mark.parametrize("axis", ["index", "columns", "none"])
def test_align_axis(align, exp, axis):
    # test all axis combinations with positive values and different aligns
    data = DataFrame([[1, 2], [3, 4]])
    result = (
        data.style.bar(align=align, axis=None if axis == "none" else axis)
        ._compute()
        .ctx
    )
    expected = {
        (0, 0): exp[axis][0][0],
        (0, 1): exp[axis][0][1],
        (1, 0): exp[axis][1][0],
        (1, 1): exp[axis][1][1],
    }
    assert result == expected


@pytest.mark.parametrize(
    "values, vmin, vmax",
    [
        ("positive", 1.5, 2.5),
        ("negative", -2.5, -1.5),
        ("mixed", -2.5, 1.5),
    ],
)
@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"])  # test min/max separately
@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"])
def test_vmin_vmax_clipping(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align):
    # test that clipping occurs if any vmin > data_values or vmax < data_values
    if align == "mid":  # mid acts as left or right in each case
        if values == "positive":
            align = "left"
        elif values == "negative":
            align = "right"
    df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values]
    vmin = None if nullify == "vmin" else vmin
    vmax = None if nullify == "vmax" else vmax

    clip_df = df.where(df <= (vmax if vmax else 999), other=vmax)
    clip_df = clip_df.where(clip_df >= (vmin if vmin else -999), other=vmin)

    result = (
        df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"])
        ._compute()
        .ctx
    )
    expected = clip_df.style.bar(align=align, color=["red", "green"])._compute().ctx
    assert result == expected


@pytest.mark.parametrize(
    "values, vmin, vmax",
    [
        ("positive", 0.5, 4.5),
        ("negative", -4.5, -0.5),
        ("mixed", -4.5, 4.5),
    ],
)
@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"])  # test min/max separately
@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"])
def test_vmin_vmax_widening(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align):
    # test that widening occurs if any vmax > data_values or vmin < data_values
    if align == "mid":  # mid acts as left or right in each case
        if values == "positive":
            align = "left"
        elif values == "negative":
            align = "right"
    df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values]
    vmin = None if nullify == "vmin" else vmin
    vmax = None if nullify == "vmax" else vmax

    expand_df = df.copy()
    expand_df.loc[3, :], expand_df.loc[4, :] = vmin, vmax

    result = (
        df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"])
        ._compute()
        .ctx
    )
    expected = expand_df.style.bar(align=align, color=["red", "green"])._compute().ctx
    assert result.items() <= expected.items()


def test_numerics():
    # test data is pre-selected for numeric values
    data = DataFrame([[1, "a"], [2, "b"]])
    result = data.style.bar()._compute().ctx
    assert (0, 1) not in result
    assert (1, 1) not in result


@pytest.mark.parametrize(
    "align, exp",
    [
        ("left", [no_bar(), bar_to(100, "green")]),
        ("right", [bar_to(100, "red"), no_bar()]),
        ("mid", [bar_to(25, "red"), bar_from_to(25, 100, "green")]),
        ("zero", [bar_from_to(33.33, 50, "red"), bar_from_to(50, 100, "green")]),
    ],
)
def test_colors_mixed(align, exp):
    data = DataFrame([[-1], [3]])
    result = data.style.bar(align=align, color=["red", "green"])._compute().ctx
    assert result == {(0, 0): exp[0], (1, 0): exp[1]}


def test_bar_align_height():
    # test when keyword height is used 'no-repeat center' and 'background-size' present
    data = DataFrame([[1], [2]])
    result = data.style.bar(align="left", height=50)._compute().ctx
    bg_s = "linear-gradient(90deg, #d65f5f 100.0%, transparent 100.0%) no-repeat center"
    expected = {
        (0, 0): [("width", "10em")],
        (1, 0): [
            ("width", "10em"),
            ("background", bg_s),
            ("background-size", "100% 50.0%"),
        ],
    }
    assert result == expected


def test_bar_value_error_raises():
    df = DataFrame({"A": [-100, -60, -30, -20]})

    msg = "`align` should be in {'left', 'right', 'mid', 'mean', 'zero'} or"
    with pytest.raises(ValueError, match=msg):
        df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]).to_html()

    msg = r"`width` must be a value in \[0, 100\]"
    with pytest.raises(ValueError, match=msg):
        df.style.bar(width=200).to_html()

    msg = r"`height` must be a value in \[0, 100\]"
    with pytest.raises(ValueError, match=msg):
        df.style.bar(height=200).to_html()


def test_bar_color_and_cmap_error_raises():
    df = DataFrame({"A": [1, 2, 3, 4]})
    msg = "`color` and `cmap` cannot both be given"
    # Test that providing both color and cmap raises a ValueError
    with pytest.raises(ValueError, match=msg):
        df.style.bar(color="#d65f5f", cmap="viridis").to_html()


def test_bar_invalid_color_type_error_raises():
    df = DataFrame({"A": [1, 2, 3, 4]})
    msg = (
        r"`color` must be string or list or tuple of 2 strings,"
        r"\(eg: color=\['#d65f5f', '#5fba7d'\]\)"
    )
    # Test that providing an invalid color type raises a ValueError
    with pytest.raises(ValueError, match=msg):
        df.style.bar(color=123).to_html()

    # Test that providing a color list with more than two elements raises a ValueError
    with pytest.raises(ValueError, match=msg):
        df.style.bar(color=["#d65f5f", "#5fba7d", "#abcdef"]).to_html()


def test_styler_bar_with_NA_values():
    df1 = DataFrame({"A": [1, 2, NA, 4]})
    df2 = DataFrame([[NA, NA], [NA, NA]])
    expected_substring = "style type="
    html_output1 = df1.style.bar(subset="A").to_html()
    html_output2 = df2.style.bar(align="left", axis=None).to_html()
    assert expected_substring in html_output1
    assert expected_substring in html_output2


def test_style_bar_with_pyarrow_NA_values():
    td.versioned_importorskip("pyarrow")
    data = """name,age,test1,test2,teacher
        Adam,15,95.0,80,Ashby
        Bob,16,81.0,82,Ashby
        Dave,16,89.0,84,Jones
        Fred,15,,88,Jones"""
    df = read_csv(io.StringIO(data), dtype_backend="pyarrow")
    expected_substring = "style type="
    html_output = df.style.bar(subset="test1").to_html()
    assert expected_substring in html_output