1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
|
"""
Tests for methods which return collections of transcript IDs that aren't
converting from some type of name or ID.
"""
from __future__ import absolute_import
from pyensembl import genome_for_reference_name
from .common import eq_
from .common import run_multiple_genomes
grch38 = genome_for_reference_name("GRCh38")
# subset of transcript IDs for HLA-A
HLA_A_TRANSCRIPT_IDS = [
"ENST00000396634",
"ENST00000376809",
"ENST00000376806",
"ENST00000376802",
"ENST00000496081",
"ENST00000495183",
"ENST00000461903",
"ENST00000479320",
]
def test_transcript_ids_ensembl_grch38_hla_a():
# chr6:29,945,884 is a position for HLA-A
# based on:
# http://useast.ensembl.org/Homo_sapiens/Gene/
# Summary?db=core;g=ENSG00000206503;r=6:29941260-29945884
transcript_ids = grch38.transcript_ids_at_locus(6, 29941260, 29945884)
for transcript_id in HLA_A_TRANSCRIPT_IDS:
assert transcript_id in transcript_ids, (
"Transcript %s of HLA-A not found overlapping locus" % transcript_id
)
KNOWN_TRANSCRIPT_IDS = HLA_A_TRANSCRIPT_IDS + [
"ENST00000398417", # transcript ID of SMAD4-001
"ENST00000334701", # transcript ID of HSP90AA1-001
"ENST00000599837", # transcript ID of CTAG1A-002
]
# TODO: add release 54 after transcript IDs for older GTFs are filled in
# See https://github.com/hammerlab/pyensembl/issues/20
@run_multiple_genomes(75, grch38.release)
def test_all_transcript_ids(genome):
transcript_ids = set(genome.transcript_ids())
for transcript_id in KNOWN_TRANSCRIPT_IDS:
assert transcript_id in transcript_ids, "Missing transcript ID %s from %s" % (
transcript_id,
genome,
)
def test_transcript_id_of_protein_id_CCR2():
# Looked up on Oct 9 2021:
# CCR2-203 ENST00000445132.3 maps to ENSP00000399285.2
# Ensembl release 104, GRCh38.p13
transcript_id = grch38.transcript_id_of_protein_id("ENSP00000399285")
eq_("ENST00000445132", transcript_id)
|