1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
|
### This test sometimes breaks following a new Ensembl release.
### Updating it is a little bit of an ad hoc process. First we need to
### understand what has changed at Ensembl about the 6 transcripts used
### in the test. A careful comparison of 'target_txdb' and 'current_txdb'
### should reveal that. The new transcript attributes can also be checked
### on the Ensembl site directly e.g. by looking up the transcript id using
### the search box at http://www.ensembl.org/ (remember to select Human).
### If the content of 'current_txdb' is in accordance with what the Ensembl
### site says, then:
### - adjust any of the TARGET_ variable below that needs adjustment
### - dump 'current_txdb' to GenomicFeatures/inst/extdata/ with
### saveDb(current_txdb, "Biomart_Ensembl_sample.sqlite")
### Then reinstall GenomicFeatures and try to run test_makeTxDbFromBiomart()
### again. Hopefully it'll pass now.
test_makeTxDbFromBiomart <- function()
{
## We test makeTxDbFromBiomart() on the following transcripts:
TARGET_TX_NAME <- c(
## Coding transcripts.
"ENST00000013894", # 7 exons on + strand, CDS'es on exons 1:3
"ENST00000268655", # 3 exons on + strand, CDS'es on all exons
"ENST00000313243", # 14 exons on - strand, CDS'es on exons 2:14
"ENST00000435657", # 30 exons on - strand, CDS'es on exons 2:30
## Non-coding transcripts.
"ENST00000384428", # 1 exon on - strand, no CDS
"ENST00000478783" # 2 exons on + strand, no CDS
)
TARGET_NEXONS_PER_TX <- c(7L, 3L, 14L, 30L, 1L, 2L)
TARGET_TX_STRAND <- c("+", "+", "-", "-", "-", "+")
TARGET_CDS2EXON <- list(1:3, 1:3, 2:14, 2:30)
TARGET_GENE <- c("ENSG00000011198", "ENSG00000103343",
"ENSG00000111837", "ENSG00000231116",
"ENSG00000207157", "ENSG00000067646")
current_txdb <- makeTxDbFromBiomart(transcript_ids=TARGET_TX_NAME,
circ_seqs="MT")
checkTrue(validObject(current_txdb))
## Extract transcripts and re-order them as in TARGET_TX_NAME.
current_tx <- transcripts(current_txdb, columns=c("tx_name", "gene_id"))
checkIdentical(length(TARGET_TX_NAME), length(current_tx))
current_tx_name <- mcols(current_tx)$tx_name
target2current <- match(TARGET_TX_NAME, current_tx_name)
current_tx <- current_tx[target2current]
## Check transcript name.
current_tx_name <- mcols(current_tx)$tx_name
checkIdentical(TARGET_TX_NAME, current_tx_name)
## Check strand.
current_tx_strand <- as.character(strand(current_tx))
checkIdentical(TARGET_TX_STRAND, current_tx_strand)
## Check gene.
current_gene <- as.character(mcols(current_tx)$gene_id)
checkIdentical(TARGET_GENE, current_gene)
## Check nb of exons per transcript.
ex_by_tx <- exonsBy(current_txdb, by="tx", use.names=TRUE)
checkTrue(setequal(TARGET_TX_NAME, names(ex_by_tx)))
nexons_per_tx <- elementNROWS(ex_by_tx)[TARGET_TX_NAME]
checkIdentical(TARGET_NEXONS_PER_TX, as.integer(nexons_per_tx))
## Check CDS'es.
tx_names_with_cds <- head(TARGET_TX_NAME, n=-2)
cds_by_tx <- cdsBy(current_txdb, by="tx", use.names=TRUE)
checkTrue(setequal(tx_names_with_cds, names(cds_by_tx)))
cds_by_tx <- cds_by_tx[tx_names_with_cds]
current_cds2exon <- mcols(unlist(cds_by_tx, use.names=FALSE))$exon_rank
checkIdentical(unlist(TARGET_CDS2EXON), current_cds2exon)
## Compare with a precomputed TxDb object.
target_file <- system.file("extdata",
"Biomart_Ensembl_sample.sqlite",
package="GenomicFeatures")
target_txdb <- loadDb(target_file)
checkTrue(GenomicFeatures:::compareTxDbs(target_txdb, current_txdb))
}
|