1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
|
"""Test handling of values in `obs`/ `var`"""
from __future__ import annotations
import numpy as np
import pandas as pd
import pytest
from natsort import natsorted
import anndata as ad
from anndata.tests.helpers import get_multiindex_columns_df
@pytest.mark.parametrize("dtype", [object, "string"])
def test_str_to_categorical(dtype):
obs = pd.DataFrame(
{"str": ["a", "a", None, "b", "b"]}, index=[f"cell-{i}" for i in range(5)]
)
obs["str"] = obs["str"].astype(dtype)
a = ad.AnnData(obs=obs.copy())
a.strings_to_categoricals()
expected = obs["str"].astype("category")
pd.testing.assert_series_equal(expected, a.obs["str"])
@pytest.mark.parametrize("dtype", [object, "string"])
def test_to_categorical_ordering(dtype):
obs = pd.DataFrame(
{"str": ["10", "11", "3", "9", "10", "10"]},
index=[f"cell-{i}" for i in range(6)],
)
obs["str"] = obs["str"].astype(dtype)
a = ad.AnnData(obs=obs.copy())
a.strings_to_categoricals()
expected = obs["str"].astype(
pd.CategoricalDtype(categories=natsorted(obs["str"].unique()))
)
pd.testing.assert_series_equal(expected, a.obs["str"])
def test_non_str_to_not_categorical():
# Test case based on https://github.com/scverse/anndata/issues/141#issuecomment-802105259
obs = pd.DataFrame(index=[f"cell-{i}" for i in range(5)]).assign(
str_with_nan=["foo", "bar", None, np.nan, "foo"],
boolean_with_nan_and_none=[True, False, np.nan, None, True],
boolean_with_nan=[True, False, np.nan, np.nan, True],
boolean_with_none=[True, False, None, None, True],
)
adata = ad.AnnData(obs=obs.copy())
orig_dtypes = {k: v.name for k, v in obs.dtypes.items()}
expected_dtypes = orig_dtypes.copy()
expected_dtypes["str_with_nan"] = "category"
adata.strings_to_categoricals()
result_dtypes = {k: v.name for k, v in adata.obs.dtypes.items()}
assert expected_dtypes == result_dtypes
expected_non_transformed = obs.drop(columns=["str_with_nan"])
result_non_transformed = adata.obs.drop(columns=["str_with_nan"])
pd.testing.assert_frame_equal(expected_non_transformed, result_non_transformed)
def test_error_col_multiindex():
adata = ad.AnnData(np.random.rand(100, 10))
df = get_multiindex_columns_df((adata.shape[0], 20))
with pytest.raises(ValueError, match=r"MultiIndex columns are not supported"):
adata.obs = df
def test_error_row_multiindex():
df = pd.DataFrame(
{"x": [1, 2, 3]},
index=pd.MultiIndex.from_tuples([("a", 1), ("b", 2), ("c", 3)]),
)
with pytest.raises(
ValueError, match=r"pandas.MultiIndex not supported as index for obs or var"
):
ad.AnnData(df)
def test_error_row_multiindex_setter():
df = pd.DataFrame(
{"x": [1, 2, 3]},
index=pd.MultiIndex.from_tuples([("a", 1), ("b", 2), ("c", 3)]),
)
adata = ad.AnnData(np.random.rand(3, 10))
with pytest.raises(
ValueError, match=r"pandas.MultiIndex not supported as index for obs or var"
):
adata.obs = df
|