File: test_aggregation.py

package info (click to toggle)
seaborn 0.13.2-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,920 kB
  • sloc: python: 37,249; makefile: 182; javascript: 45; sh: 15
file content (136 lines) | stat: -rw-r--r-- 3,914 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

import numpy as np
import pandas as pd

import pytest
from pandas.testing import assert_frame_equal

from seaborn._core.groupby import GroupBy
from seaborn._stats.aggregation import Agg, Est


class AggregationFixtures:

    @pytest.fixture
    def df(self, rng):

        n = 30
        return pd.DataFrame(dict(
            x=rng.uniform(0, 7, n).round(),
            y=rng.normal(size=n),
            color=rng.choice(["a", "b", "c"], n),
            group=rng.choice(["x", "y"], n),
        ))

    def get_groupby(self, df, orient):

        other = {"x": "y", "y": "x"}[orient]
        cols = [c for c in df if c != other]
        return GroupBy(cols)


class TestAgg(AggregationFixtures):

    def test_default(self, df):

        ori = "x"
        df = df[["x", "y"]]
        gb = self.get_groupby(df, ori)
        res = Agg()(df, gb, ori, {})

        expected = df.groupby("x", as_index=False)["y"].mean()
        assert_frame_equal(res, expected)

    def test_default_multi(self, df):

        ori = "x"
        gb = self.get_groupby(df, ori)
        res = Agg()(df, gb, ori, {})

        grp = ["x", "color", "group"]
        index = pd.MultiIndex.from_product(
            [sorted(df["x"].unique()), df["color"].unique(), df["group"].unique()],
            names=["x", "color", "group"]
        )
        expected = (
            df
            .groupby(grp)
            .agg("mean")
            .reindex(index=index)
            .dropna()
            .reset_index()
            .reindex(columns=df.columns)
        )
        assert_frame_equal(res, expected)

    @pytest.mark.parametrize("func", ["max", lambda x: float(len(x) % 2)])
    def test_func(self, df, func):

        ori = "x"
        df = df[["x", "y"]]
        gb = self.get_groupby(df, ori)
        res = Agg(func)(df, gb, ori, {})

        expected = df.groupby("x", as_index=False)["y"].agg(func)
        assert_frame_equal(res, expected)


class TestEst(AggregationFixtures):

    # Note: Most of the underlying code is exercised in tests/test_statistics

    @pytest.mark.parametrize("func", [np.mean, "mean"])
    def test_mean_sd(self, df, func):

        ori = "x"
        df = df[["x", "y"]]
        gb = self.get_groupby(df, ori)
        res = Est(func, "sd")(df, gb, ori, {})

        grouped = df.groupby("x", as_index=False)["y"]
        est = grouped.mean()
        err = grouped.std().fillna(0)  # fillna needed only on pinned tests
        expected = est.assign(ymin=est["y"] - err["y"], ymax=est["y"] + err["y"])
        assert_frame_equal(res, expected)

    def test_sd_single_obs(self):

        y = 1.5
        ori = "x"
        df = pd.DataFrame([{"x": "a", "y": y}])
        gb = self.get_groupby(df, ori)
        res = Est("mean", "sd")(df, gb, ori, {})
        expected = df.assign(ymin=y, ymax=y)
        assert_frame_equal(res, expected)

    def test_median_pi(self, df):

        ori = "x"
        df = df[["x", "y"]]
        gb = self.get_groupby(df, ori)
        res = Est("median", ("pi", 100))(df, gb, ori, {})

        grouped = df.groupby("x", as_index=False)["y"]
        est = grouped.median()
        expected = est.assign(ymin=grouped.min()["y"], ymax=grouped.max()["y"])
        assert_frame_equal(res, expected)

    def test_weighted_mean(self, df, rng):

        weights = rng.uniform(0, 5, len(df))
        gb = self.get_groupby(df[["x", "y"]], "x")
        df = df.assign(weight=weights)
        res = Est("mean")(df, gb, "x", {})
        for _, res_row in res.iterrows():
            rows = df[df["x"] == res_row["x"]]
            expected = np.average(rows["y"], weights=rows["weight"])
            assert res_row["y"] == expected

    def test_seed(self, df):

        ori = "x"
        gb = self.get_groupby(df, ori)
        args = df, gb, ori, {}
        res1 = Est("mean", "ci", seed=99)(*args)
        res2 = Est("mean", "ci", seed=99)(*args)
        assert_frame_equal(res1, res2)