File: test_annot.py

package info (click to toggle)
python-anndata 0.12.6-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,876 kB
  • sloc: python: 21,429; makefile: 23
file content (96 lines) | stat: -rw-r--r-- 3,101 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""Test handling of values in `obs`/ `var`"""

from __future__ import annotations

import numpy as np
import pandas as pd
import pytest
from natsort import natsorted

import anndata as ad
from anndata.tests.helpers import get_multiindex_columns_df


@pytest.mark.parametrize("dtype", [object, "string"])
def test_str_to_categorical(dtype):
    obs = pd.DataFrame(
        {"str": ["a", "a", None, "b", "b"]}, index=[f"cell-{i}" for i in range(5)]
    )
    obs["str"] = obs["str"].astype(dtype)
    a = ad.AnnData(obs=obs.copy())

    a.strings_to_categoricals()
    expected = obs["str"].astype("category")
    pd.testing.assert_series_equal(expected, a.obs["str"])


@pytest.mark.parametrize("dtype", [object, "string"])
def test_to_categorical_ordering(dtype):
    obs = pd.DataFrame(
        {"str": ["10", "11", "3", "9", "10", "10"]},
        index=[f"cell-{i}" for i in range(6)],
    )
    obs["str"] = obs["str"].astype(dtype)
    a = ad.AnnData(obs=obs.copy())

    a.strings_to_categoricals()

    expected = obs["str"].astype(
        pd.CategoricalDtype(categories=natsorted(obs["str"].unique()))
    )
    pd.testing.assert_series_equal(expected, a.obs["str"])


def test_non_str_to_not_categorical():
    # Test case based on https://github.com/scverse/anndata/issues/141#issuecomment-802105259
    obs = pd.DataFrame(index=[f"cell-{i}" for i in range(5)]).assign(
        str_with_nan=["foo", "bar", None, np.nan, "foo"],
        boolean_with_nan_and_none=[True, False, np.nan, None, True],
        boolean_with_nan=[True, False, np.nan, np.nan, True],
        boolean_with_none=[True, False, None, None, True],
    )
    adata = ad.AnnData(obs=obs.copy())

    orig_dtypes = {k: v.name for k, v in obs.dtypes.items()}
    expected_dtypes = orig_dtypes.copy()
    expected_dtypes["str_with_nan"] = "category"

    adata.strings_to_categoricals()
    result_dtypes = {k: v.name for k, v in adata.obs.dtypes.items()}

    assert expected_dtypes == result_dtypes

    expected_non_transformed = obs.drop(columns=["str_with_nan"])
    result_non_transformed = adata.obs.drop(columns=["str_with_nan"])

    pd.testing.assert_frame_equal(expected_non_transformed, result_non_transformed)


def test_error_col_multiindex():
    adata = ad.AnnData(np.random.rand(100, 10))
    df = get_multiindex_columns_df((adata.shape[0], 20))
    with pytest.raises(ValueError, match=r"MultiIndex columns are not supported"):
        adata.obs = df


def test_error_row_multiindex():
    df = pd.DataFrame(
        {"x": [1, 2, 3]},
        index=pd.MultiIndex.from_tuples([("a", 1), ("b", 2), ("c", 3)]),
    )
    with pytest.raises(
        ValueError, match=r"pandas.MultiIndex not supported as index for obs or var"
    ):
        ad.AnnData(df)


def test_error_row_multiindex_setter():
    df = pd.DataFrame(
        {"x": [1, 2, 3]},
        index=pd.MultiIndex.from_tuples([("a", 1), ("b", 2), ("c", 3)]),
    )
    adata = ad.AnnData(np.random.rand(3, 10))
    with pytest.raises(
        ValueError, match=r"pandas.MultiIndex not supported as index for obs or var"
    ):
        adata.obs = df