File: test_makeTxDbFromBiomart.R

package info (click to toggle)
r-bioc-txdbmaker 1.2.1%2Bds-2
links: PTS, VCS
area: main
in suites: sid, trixie
size: 3,168 kB
sloc: makefile: 2
file content (80 lines) | stat: -rw-r--r-- 3,789 bytes
parent folder | download | duplicates (2)
### This test sometimes breaks following a new Ensembl release.
### Updating it is a little bit of an ad hoc process. First we need to
### understand what has changed at Ensembl about the 6 transcripts used
### in the test. A careful comparison of 'target_txdb' and 'current_txdb'
### should reveal that. The new transcript attributes can also be checked
### on the Ensembl site directly e.g. by looking up the transcript id using
### the search box at http://www.ensembl.org/ (remember to select Human).
### If the content of 'current_txdb' is in accordance with what the Ensembl
### site says, then:
### - adjust any of the TARGET_ variable below that needs adjustment
### - dump 'current_txdb' to GenomicFeatures/inst/extdata/ with
###   saveDb(current_txdb, "Biomart_Ensembl_sample.sqlite")
### Then reinstall GenomicFeatures and try to run test_makeTxDbFromBiomart()
### again. Hopefully it'll pass now.

test_makeTxDbFromBiomart <- function()
{
    ## We test makeTxDbFromBiomart() on the following transcripts:
    TARGET_TX_NAME <- c(
        ## Coding transcripts.
        "ENST00000013894",  #  7 exons on + strand, CDS'es on exons 1:3
        "ENST00000268655",  #  3 exons on + strand, CDS'es on all exons
        "ENST00000313243",  # 14 exons on - strand, CDS'es on exons 2:14
        "ENST00000435657",  # 30 exons on - strand, CDS'es on exons 2:30
        ## Non-coding transcripts.
        "ENST00000384428",  #  1 exon  on - strand, no CDS
        "ENST00000478783"   #  2 exons on + strand, no CDS
    )
    TARGET_NEXONS_PER_TX <- c(7L, 3L, 14L, 30L, 1L, 2L)
    TARGET_TX_STRAND <- c("+", "+", "-", "-", "-", "+")
    TARGET_CDS2EXON <- list(1:3, 1:3, 2:14, 2:30)
    TARGET_GENE <- c("ENSG00000011198", "ENSG00000103343",
                     "ENSG00000111837", "ENSG00000231116",
                     "ENSG00000207157", "ENSG00000067646")

    current_txdb <- makeTxDbFromBiomart(transcript_ids=TARGET_TX_NAME,
                                        circ_seqs="MT")
    checkTrue(validObject(current_txdb))

    ## Extract transcripts and re-order them as in TARGET_TX_NAME.
    current_tx <- transcripts(current_txdb, columns=c("tx_name", "gene_id"))
    checkIdentical(length(TARGET_TX_NAME), length(current_tx))
    current_tx_name <- mcols(current_tx)$tx_name
    target2current <- match(TARGET_TX_NAME, current_tx_name)
    current_tx <- current_tx[target2current]

    ## Check transcript name.
    current_tx_name <- mcols(current_tx)$tx_name
    checkIdentical(TARGET_TX_NAME, current_tx_name)

    ## Check strand.
    current_tx_strand <- as.character(strand(current_tx))
    checkIdentical(TARGET_TX_STRAND, current_tx_strand)

    ## Check gene.
    current_gene <- as.character(mcols(current_tx)$gene_id)
    checkIdentical(TARGET_GENE, current_gene)
 
    ## Check nb of exons per transcript.
    ex_by_tx <- exonsBy(current_txdb, by="tx", use.names=TRUE)
    checkTrue(setequal(TARGET_TX_NAME, names(ex_by_tx)))
    nexons_per_tx <- elementNROWS(ex_by_tx)[TARGET_TX_NAME]
    checkIdentical(TARGET_NEXONS_PER_TX, as.integer(nexons_per_tx))

    ## Check CDS'es.
    tx_names_with_cds <- head(TARGET_TX_NAME, n=-2)
    cds_by_tx <- cdsBy(current_txdb, by="tx", use.names=TRUE)
    checkTrue(setequal(tx_names_with_cds, names(cds_by_tx)))
    cds_by_tx <- cds_by_tx[tx_names_with_cds]
    current_cds2exon <- mcols(unlist(cds_by_tx, use.names=FALSE))$exon_rank
    checkIdentical(unlist(TARGET_CDS2EXON), current_cds2exon)
  
    ## Compare with a precomputed TxDb object.
    target_file <- system.file("extdata",
                               "Biomart_Ensembl_sample.sqlite",
                               package="GenomicFeatures")
    target_txdb <- loadDb(target_file)
    checkTrue(GenomicFeatures:::compareTxDbs(target_txdb, current_txdb))
}