1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
|
from gtfparse import read_gtf
from data import data_path
B16_GTF_PATH = data_path("B16.stringtie.head.gtf")
def _check_required_columns(gtf_dict):
assert "feature" in gtf_dict, "Expected column named 'feature' in StringTie GTF"
assert "cov" in gtf_dict, "Expected column named 'cov' in StringTie GTF"
assert "FPKM" in gtf_dict, "Expected column named 'FPKM' in StringTie GTF"
features = set(gtf_dict["feature"])
assert "exon" in features, "No exons in GTF (available: %s)" % features
assert "transcript" in features, "No transcripts in GTF (available: %s)" % features
def _check_string_cov_and_FPKM(gtf_dict):
for i, feature_name in enumerate(gtf_dict["feature"]):
cov = gtf_dict["cov"][i]
fpkm = gtf_dict["FPKM"][i]
if feature_name == "exon":
assert len(fpkm) == 0, \
"Expected missing FPKM for exon, got %s" % (fpkm,)
assert len(cov) > 0 and float(cov) >= 0, \
"Expected non-negative cov for exon, got %s" % (cov,)
elif feature_name == "transcript":
assert len(cov) and float(cov) >= 0, \
"Expected non-negative cov for transcript, got %s" % (cov,)
assert len(fpkm) > 0 and float(fpkm) >= 0, \
"Expected non-negative FPKM for transcript, got %s" % (fpkm,)
def _check_float_cov_and_FPKM(gtf_dict):
for i, feature_name in enumerate(gtf_dict["feature"]):
cov = gtf_dict["cov"][i]
fpkm = gtf_dict["FPKM"][i]
assert isinstance(cov, float), \
"Expected cov to be float but got %s : %s" % (cov, type(cov))
if feature_name == "exon":
assert cov >= 0, "Expected non-negative cov for exon, got %s" % (cov,)
elif feature_name == "transcript":
assert isinstance(fpkm, float), \
"Expected FPKM to be float but got %s : %s" % (fpkm, type(fpkm))
assert cov >= 0, "Expected non-negative cov for transcript, got %s" % (cov,)
assert fpkm >= 0, "Expected non-negative FPKM for transcript, got %s" % (fpkm,)
def test_read_stringtie_gtf_as_dataframe():
gtf_df = read_gtf(B16_GTF_PATH)
_check_required_columns(gtf_df)
_check_string_cov_and_FPKM(gtf_df)
def test_read_stringtie_gtf_as_dataframe_float_values():
gtf_df = read_gtf(
B16_GTF_PATH,
column_converters={"cov": float, "FPKM": float})
_check_required_columns(gtf_df)
_check_float_cov_and_FPKM(gtf_df)
|