File: test_refseq_gtf.py

package info (click to toggle)
python-gtfparse 1.3.0%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 244 kB
  • sloc: python: 583; makefile: 12; sh: 8
file content (16 lines) | stat: -rw-r--r-- 732 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from gtfparse import read_gtf
from data import data_path

REFSEQ_GTF_PATH = data_path("refseq.ucsc.small.gtf")

def _check_required_columns(gtf_dict):
    assert "feature" in gtf_dict, "Expected column named 'feature' in RefSeq GTF"
    assert "gene_id" in gtf_dict, "Expected column named 'gene_id' in RefSeq GTF"
    assert "transcript_id" in gtf_dict, "Expected column named 'transcript_id' in RefSeq GTF"
    features = set(gtf_dict["feature"])
    assert "exon" in features, "No exon features in GTF (available: %s)" % features
    assert "CDS" in features, "No CDS features in GTF (available: %s)" % features

def test_read_refseq_gtf_as_dataframe():
    gtf_df = read_gtf(REFSEQ_GTF_PATH)
    _check_required_columns(gtf_df)