1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
|
from io import StringIO
import pandas as pd
import numpy as np
import pytest
from bioframe.core import specs
import bioframe
def test_get_default_colnames():
assert specs._get_default_colnames() == ("chrom", "start", "end")
def test_update_default_colnames():
new_names = ("C", "chromStart", "chromStop")
specs.update_default_colnames(new_names)
assert specs._get_default_colnames() == new_names
# test that with updated default column names, bioframe.ops recognizes df1
df1 = pd.DataFrame(
[["chr1", 1, 5], ["chr1", 3, 8], ["chr1", 8, 10], ["chr1", 12, 14]],
columns=list(new_names),
)
df1_chromsizes = {"chr1": 100, "chrX": 100}
df1_complement = pd.DataFrame(
[
["chr1", 0, 1, "chr1"],
["chr1", 10, 12, "chr1"],
["chr1", 14, 100, "chr1"],
["chrX", 0, 100, "chrX"],
],
columns=list(new_names) + ["view_region"],
)
pd.testing.assert_frame_equal(
bioframe.complement(df1, view_df=df1_chromsizes), df1_complement
)
# cannot update with just two colujmns
with pytest.raises(ValueError):
specs.update_default_colnames(("chromosome", "position"))
# extra stuff is not allowed
with pytest.raises(ValueError):
specs.update_default_colnames(["chromosome", "start", "end", "extrasuff"])
# reset to default
specs.update_default_colnames(("chrom", "start", "end"))
def test_verify_columns():
new_names = ("C", "chromStart", "chromStop")
df1 = pd.DataFrame(
[["chr1", 1, 5], ["chr1", 3, 8], ["chr1", 8, 10], ["chr1", 12, 14]],
columns=list(new_names),
)
with pytest.raises(ValueError):
specs._verify_columns(df1, specs._get_default_colnames())
assert specs._verify_columns(
df1,
new_names,
return_as_bool=True,
)
# no repeated column names
with pytest.raises(ValueError):
specs._verify_columns(df1, ["chromStart", "chromStart"], unique_cols=True)
def test_verify_column_dtypes():
new_names = ("C", "chromStart", "chromStop")
df1 = pd.DataFrame(
[["chr1", 1, 5], ["chr1", 3, 8], ["chr1", 8, 10], ["chr1", 12, 14]],
columns=list(new_names),
)
with pytest.raises(ValueError):
specs._verify_column_dtypes(df1, specs._get_default_colnames())
assert specs._verify_column_dtypes(df1, new_names, return_as_bool=True)
df1["chromStart"] = df1["chromStart"].astype(float)
assert specs._verify_column_dtypes(df1, new_names, return_as_bool=True) is False
df1["chromStart"] = df1["chromStart"].astype(pd.Int64Dtype())
assert specs._verify_column_dtypes(df1, new_names, return_as_bool=True)
df1["C"] = df1["C"].str.replace("chr", "").astype(np.int64)
assert specs._verify_column_dtypes(df1, new_names, return_as_bool=True) is False
def test_is_chrom_dtype():
assert specs.is_chrom_dtype(type("chrX"))
fruit = pd.CategoricalDtype(
categories=["oranges", "grapefruit", "apples"], ordered=True
)
assert specs.is_chrom_dtype(fruit)
assert not specs.is_chrom_dtype(type(1))
assert not specs.is_chrom_dtype(type(10.0))
|