1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
|
import numpy as np
import pandas as pd
import pytest
from seaborn.external.version import Version
from seaborn._core.rules import (
VarType,
variable_type,
categorical_order,
)
def test_vartype_object():
v = VarType("numeric")
assert v == "numeric"
assert v != "categorical"
with pytest.raises(AssertionError):
v == "number"
with pytest.raises(AssertionError):
VarType("date")
def test_variable_type():
s = pd.Series([1., 2., 3.])
assert variable_type(s) == "numeric"
assert variable_type(s.astype(int)) == "numeric"
assert variable_type(s.astype(object)) == "numeric"
assert variable_type(s.to_numpy()) == "numeric"
assert variable_type(s.to_list()) == "numeric"
s = pd.Series([1, 2, 3, np.nan], dtype=object)
assert variable_type(s) == "numeric"
s = pd.Series([np.nan, np.nan])
assert variable_type(s) == "numeric"
if Version(pd.__version__) >= Version("1.0.0"):
s = pd.Series([pd.NA, pd.NA])
assert variable_type(s) == "numeric"
s = pd.Series(["1", "2", "3"])
assert variable_type(s) == "categorical"
assert variable_type(s.to_numpy()) == "categorical"
assert variable_type(s.to_list()) == "categorical"
s = pd.Series([True, False, False])
assert variable_type(s) == "numeric"
assert variable_type(s, boolean_type="categorical") == "categorical"
assert variable_type(s, boolean_type="boolean") == "boolean"
s_cat = s.astype("category")
assert variable_type(s_cat, boolean_type="categorical") == "categorical"
assert variable_type(s_cat, boolean_type="numeric") == "categorical"
assert variable_type(s_cat, boolean_type="boolean") == "categorical"
s = pd.Series([1, 0, 0])
assert variable_type(s, boolean_type="boolean") == "boolean"
assert variable_type(s, boolean_type="boolean", strict_boolean=True) == "numeric"
s = pd.Series([pd.Timestamp(1), pd.Timestamp(2)])
assert variable_type(s) == "datetime"
assert variable_type(s.astype(object)) == "datetime"
assert variable_type(s.to_numpy()) == "datetime"
assert variable_type(s.to_list()) == "datetime"
def test_categorical_order():
x = pd.Series(["a", "c", "c", "b", "a", "d"])
y = pd.Series([3, 2, 5, 1, 4])
order = ["a", "b", "c", "d"]
out = categorical_order(x)
assert out == ["a", "c", "b", "d"]
out = categorical_order(x, order)
assert out == order
out = categorical_order(x, ["b", "a"])
assert out == ["b", "a"]
out = categorical_order(y)
assert out == [1, 2, 3, 4, 5]
out = categorical_order(pd.Series(y))
assert out == [1, 2, 3, 4, 5]
y_cat = pd.Series(pd.Categorical(y, y))
out = categorical_order(y_cat)
assert out == list(y)
x = pd.Series(x).astype("category")
out = categorical_order(x)
assert out == list(x.cat.categories)
out = categorical_order(x, ["b", "a"])
assert out == ["b", "a"]
x = pd.Series(["a", np.nan, "c", "c", "b", "a", "d"])
out = categorical_order(x)
assert out == ["a", "c", "b", "d"]
|