1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
|
import numpy as np
import pandas as pd
import pytest
from pandas.testing import assert_frame_equal
from seaborn._core.groupby import GroupBy
from seaborn._stats.aggregation import Agg, Est
class AggregationFixtures:
@pytest.fixture
def df(self, rng):
n = 30
return pd.DataFrame(dict(
x=rng.uniform(0, 7, n).round(),
y=rng.normal(size=n),
color=rng.choice(["a", "b", "c"], n),
group=rng.choice(["x", "y"], n),
))
def get_groupby(self, df, orient):
other = {"x": "y", "y": "x"}[orient]
cols = [c for c in df if c != other]
return GroupBy(cols)
class TestAgg(AggregationFixtures):
def test_default(self, df):
ori = "x"
df = df[["x", "y"]]
gb = self.get_groupby(df, ori)
res = Agg()(df, gb, ori, {})
expected = df.groupby("x", as_index=False)["y"].mean()
assert_frame_equal(res, expected)
def test_default_multi(self, df):
ori = "x"
gb = self.get_groupby(df, ori)
res = Agg()(df, gb, ori, {})
grp = ["x", "color", "group"]
index = pd.MultiIndex.from_product(
[sorted(df["x"].unique()), df["color"].unique(), df["group"].unique()],
names=["x", "color", "group"]
)
expected = (
df
.groupby(grp)
.agg("mean")
.reindex(index=index)
.dropna()
.reset_index()
.reindex(columns=df.columns)
)
assert_frame_equal(res, expected)
@pytest.mark.parametrize("func", ["max", lambda x: float(len(x) % 2)])
def test_func(self, df, func):
ori = "x"
df = df[["x", "y"]]
gb = self.get_groupby(df, ori)
res = Agg(func)(df, gb, ori, {})
expected = df.groupby("x", as_index=False)["y"].agg(func)
assert_frame_equal(res, expected)
class TestEst(AggregationFixtures):
# Note: Most of the underlying code is exercised in tests/test_statistics
@pytest.mark.parametrize("func", [np.mean, "mean"])
def test_mean_sd(self, df, func):
ori = "x"
df = df[["x", "y"]]
gb = self.get_groupby(df, ori)
res = Est(func, "sd")(df, gb, ori, {})
grouped = df.groupby("x", as_index=False)["y"]
est = grouped.mean()
err = grouped.std().fillna(0) # fillna needed only on pinned tests
expected = est.assign(ymin=est["y"] - err["y"], ymax=est["y"] + err["y"])
assert_frame_equal(res, expected)
def test_sd_single_obs(self):
y = 1.5
ori = "x"
df = pd.DataFrame([{"x": "a", "y": y}])
gb = self.get_groupby(df, ori)
res = Est("mean", "sd")(df, gb, ori, {})
expected = df.assign(ymin=y, ymax=y)
assert_frame_equal(res, expected)
def test_median_pi(self, df):
ori = "x"
df = df[["x", "y"]]
gb = self.get_groupby(df, ori)
res = Est("median", ("pi", 100))(df, gb, ori, {})
grouped = df.groupby("x", as_index=False)["y"]
est = grouped.median()
expected = est.assign(ymin=grouped.min()["y"], ymax=grouped.max()["y"])
assert_frame_equal(res, expected)
def test_weighted_mean(self, df, rng):
weights = rng.uniform(0, 5, len(df))
gb = self.get_groupby(df[["x", "y"]], "x")
df = df.assign(weight=weights)
res = Est("mean")(df, gb, "x", {})
for _, res_row in res.iterrows():
rows = df[df["x"] == res_row["x"]]
expected = np.average(rows["y"], weights=rows["weight"])
assert res_row["y"] == expected
def test_seed(self, df):
ori = "x"
gb = self.get_groupby(df, ori)
args = df, gb, ori, {}
res1 = Est("mean", "ci", seed=99)(*args)
res2 = Est("mean", "ci", seed=99)(*args)
assert_frame_equal(res1, res2)
|