File: test_groupby.py

package info (click to toggle)
seaborn 0.12.2-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 6,148 kB
  • sloc: python: 36,560; makefile: 183; javascript: 45; sh: 15
file content (134 lines) | stat: -rw-r--r-- 4,056 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

import numpy as np
import pandas as pd

import pytest
from numpy.testing import assert_array_equal

from seaborn._core.groupby import GroupBy


@pytest.fixture
def df():

    return pd.DataFrame(
        columns=["a", "b", "x", "y"],
        data=[
            ["a", "g", 1, .2],
            ["b", "h", 3, .5],
            ["a", "f", 2, .8],
            ["a", "h", 1, .3],
            ["b", "f", 2, .4],
        ]
    )


def test_init_from_list():
    g = GroupBy(["a", "c", "b"])
    assert g.order == {"a": None, "c": None, "b": None}


def test_init_from_dict():
    order = {"a": [3, 2, 1], "c": None, "b": ["x", "y", "z"]}
    g = GroupBy(order)
    assert g.order == order


def test_init_requires_order():

    with pytest.raises(ValueError, match="GroupBy requires at least one"):
        GroupBy([])


def test_at_least_one_grouping_variable_required(df):

    with pytest.raises(ValueError, match="No grouping variables are present"):
        GroupBy(["z"]).agg(df, x="mean")


def test_agg_one_grouper(df):

    res = GroupBy(["a"]).agg(df, {"y": "max"})
    assert_array_equal(res.index, [0, 1])
    assert_array_equal(res.columns, ["a", "y"])
    assert_array_equal(res["a"], ["a", "b"])
    assert_array_equal(res["y"], [.8, .5])


def test_agg_two_groupers(df):

    res = GroupBy(["a", "x"]).agg(df, {"y": "min"})
    assert_array_equal(res.index, [0, 1, 2, 3, 4, 5])
    assert_array_equal(res.columns, ["a", "x", "y"])
    assert_array_equal(res["a"], ["a", "a", "a", "b", "b", "b"])
    assert_array_equal(res["x"], [1, 2, 3, 1, 2, 3])
    assert_array_equal(res["y"], [.2, .8, np.nan, np.nan, .4, .5])


def test_agg_two_groupers_ordered(df):

    order = {"b": ["h", "g", "f"], "x": [3, 2, 1]}
    res = GroupBy(order).agg(df, {"a": "min", "y": lambda x: x.iloc[0]})
    assert_array_equal(res.index, [0, 1, 2, 3, 4, 5, 6, 7, 8])
    assert_array_equal(res.columns, ["a", "b", "x", "y"])
    assert_array_equal(res["b"], ["h", "h", "h", "g", "g", "g", "f", "f", "f"])
    assert_array_equal(res["x"], [3, 2, 1, 3, 2, 1, 3, 2, 1])

    T, F = True, False
    assert_array_equal(res["a"].isna(), [F, T, F, T, T, F, T, F, T])
    assert_array_equal(res["a"].dropna(), ["b", "a", "a", "a"])
    assert_array_equal(res["y"].dropna(), [.5, .3, .2, .8])


def test_apply_no_grouper(df):

    df = df[["x", "y"]]
    res = GroupBy(["a"]).apply(df, lambda x: x.sort_values("x"))
    assert_array_equal(res.columns, ["x", "y"])
    assert_array_equal(res["x"], df["x"].sort_values())
    assert_array_equal(res["y"], df.loc[np.argsort(df["x"]), "y"])


def test_apply_one_grouper(df):

    res = GroupBy(["a"]).apply(df, lambda x: x.sort_values("x"))
    assert_array_equal(res.index, [0, 1, 2, 3, 4])
    assert_array_equal(res.columns, ["a", "b", "x", "y"])
    assert_array_equal(res["a"], ["a", "a", "a", "b", "b"])
    assert_array_equal(res["b"], ["g", "h", "f", "f", "h"])
    assert_array_equal(res["x"], [1, 1, 2, 2, 3])


def test_apply_mutate_columns(df):

    xx = np.arange(0, 5)
    hats = []

    def polyfit(df):
        fit = np.polyfit(df["x"], df["y"], 1)
        hat = np.polyval(fit, xx)
        hats.append(hat)
        return pd.DataFrame(dict(x=xx, y=hat))

    res = GroupBy(["a"]).apply(df, polyfit)
    assert_array_equal(res.index, np.arange(xx.size * 2))
    assert_array_equal(res.columns, ["a", "x", "y"])
    assert_array_equal(res["a"], ["a"] * xx.size + ["b"] * xx.size)
    assert_array_equal(res["x"], xx.tolist() + xx.tolist())
    assert_array_equal(res["y"], np.concatenate(hats))


def test_apply_replace_columns(df):

    def add_sorted_cumsum(df):

        x = df["x"].sort_values()
        z = df.loc[x.index, "y"].cumsum()
        return pd.DataFrame(dict(x=x.values, z=z.values))

    res = GroupBy(["a"]).apply(df, add_sorted_cumsum)
    assert_array_equal(res.index, df.index)
    assert_array_equal(res.columns, ["a", "x", "z"])
    assert_array_equal(res["a"], ["a", "a", "a", "b", "b"])
    assert_array_equal(res["x"], [1, 1, 2, 2, 3])
    assert_array_equal(res["z"], [.2, .5, 1.3, .4, .9])