File: test_rules.py

package info (click to toggle)
seaborn 0.12.2-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 6,148 kB
  • sloc: python: 36,560; makefile: 183; javascript: 45; sh: 15
file content (105 lines) | stat: -rw-r--r-- 3,083 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

import numpy as np
import pandas as pd

import pytest

from seaborn.external.version import Version
from seaborn._core.rules import (
    VarType,
    variable_type,
    categorical_order,
)


def test_vartype_object():

    v = VarType("numeric")
    assert v == "numeric"
    assert v != "categorical"
    with pytest.raises(AssertionError):
        v == "number"
    with pytest.raises(AssertionError):
        VarType("date")


def test_variable_type():

    s = pd.Series([1., 2., 3.])
    assert variable_type(s) == "numeric"
    assert variable_type(s.astype(int)) == "numeric"
    assert variable_type(s.astype(object)) == "numeric"
    assert variable_type(s.to_numpy()) == "numeric"
    assert variable_type(s.to_list()) == "numeric"

    s = pd.Series([1, 2, 3, np.nan], dtype=object)
    assert variable_type(s) == "numeric"

    s = pd.Series([np.nan, np.nan])
    assert variable_type(s) == "numeric"

    if Version(pd.__version__) >= Version("1.0.0"):
        s = pd.Series([pd.NA, pd.NA])
        assert variable_type(s) == "numeric"

    s = pd.Series(["1", "2", "3"])
    assert variable_type(s) == "categorical"
    assert variable_type(s.to_numpy()) == "categorical"
    assert variable_type(s.to_list()) == "categorical"

    s = pd.Series([True, False, False])
    assert variable_type(s) == "numeric"
    assert variable_type(s, boolean_type="categorical") == "categorical"
    assert variable_type(s, boolean_type="boolean") == "boolean"

    s_cat = s.astype("category")
    assert variable_type(s_cat, boolean_type="categorical") == "categorical"
    assert variable_type(s_cat, boolean_type="numeric") == "categorical"
    assert variable_type(s_cat, boolean_type="boolean") == "categorical"

    s = pd.Series([1, 0, 0])
    assert variable_type(s, boolean_type="boolean") == "boolean"
    assert variable_type(s, boolean_type="boolean", strict_boolean=True) == "numeric"

    s = pd.Series([pd.Timestamp(1), pd.Timestamp(2)])
    assert variable_type(s) == "datetime"
    assert variable_type(s.astype(object)) == "datetime"
    assert variable_type(s.to_numpy()) == "datetime"
    assert variable_type(s.to_list()) == "datetime"


def test_categorical_order():

    x = pd.Series(["a", "c", "c", "b", "a", "d"])
    y = pd.Series([3, 2, 5, 1, 4])
    order = ["a", "b", "c", "d"]

    out = categorical_order(x)
    assert out == ["a", "c", "b", "d"]

    out = categorical_order(x, order)
    assert out == order

    out = categorical_order(x, ["b", "a"])
    assert out == ["b", "a"]

    out = categorical_order(y)
    assert out == [1, 2, 3, 4, 5]

    out = categorical_order(pd.Series(y))
    assert out == [1, 2, 3, 4, 5]

    y_cat = pd.Series(pd.Categorical(y, y))
    out = categorical_order(y_cat)
    assert out == list(y)

    x = pd.Series(x).astype("category")
    out = categorical_order(x)
    assert out == list(x.cat.categories)

    out = categorical_order(x, ["b", "a"])
    assert out == ["b", "a"]

    x = pd.Series(["a", np.nan, "c", "c", "b", "a", "d"])
    out = categorical_order(x)
    assert out == ["a", "c", "b", "d"]