1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
|
context("tximeta")
test_that("tximeta works as expected", {
testthat::skip("The json file contains an invalid character, skipping for now")
dir <- system.file("extdata/salmon_dm", package="tximportData")
files <- file.path(dir, "SRR1197474", "quant.sf")
coldata <- data.frame(files, names="SRR1197474", condition="A", stringsAsFactors=FALSE)
indexDir <- file.path(dir, "Dm.BDGP6.22.98_salmon-0.14.1")
fastaFTP <- c("ftp://ftp.ensembl.org/pub/release-98/fasta/drosophila_melanogaster/cdna/Drosophila_melanogaster.BDGP6.22.cdna.all.fa.gz",
"ftp://ftp.ensembl.org/pub/release-98/fasta/drosophila_melanogaster/ncrna/Drosophila_melanogaster.BDGP6.22.ncrna.fa.gz")
gtfPath <- file.path(dir,"Drosophila_melanogaster.BDGP6.22.98.gtf.gz")
makeLinkedTxome(indexDir=indexDir, source="LocalEnsembl", organism="Drosophila melanogaster",
release="98", genome="BDGP6.22", fasta=fastaFTP, gtf=gtfPath, write=FALSE)
# TODO why not throwing warnings on Bioc
#expect_warning({se <- tximeta(coldata)}, "annotation is missing")
se <- tximeta(coldata)
# check adding IDs
library(org.Dm.eg.db)
se <- addIds(se, column="SYMBOL")
# check adding IDs from TxDb/EnsDb (commented out due to Ahub/EnsDb issue)
# se <- addIds(se, column="SYMBOL", fromDb=TRUE)
# check adding CDS
library(SummarizedExperiment)
se2 <- addCDS(se)
# check adding exons
se <- addExons(se)
# check adding CDS on exons
se <- addCDS(se)
# check summarize to gene
gse <- summarizeToGene(se)
expect_error(addExons(gse), "transcript-level")
# check adding IDs
library(org.Dm.eg.db)
gse <- addIds(gse, "REFSEQ", gene=TRUE)
# check retrieving the database
edb <- retrieveDb(se)
# check retrieving the cDNA sequence
if (FALSE) {
# this requires access to Ensembl ftp
cdna <- retrieveCDNA(se)
}
# just a vector of file paths is ok...
# TODO why not throwing warnings on Bioc
#expect_warning({se <- tximeta(files)})
se <- tximeta(files)
# check error on txOut=FALSE
expect_error({se <- tximeta(coldata, txOut=FALSE)},
"transcript-level output")
# check skipping metadata, appropriate warnings
se <- tximeta(coldata, skipMeta=TRUE)
expect_error({summarizeToGene(se)}, "transcriptome metadata")
expect_error({addIds(se)}, "transcriptome metadata")
# check customMetaInfo
se <- tximeta(coldata, customMetaInfo="aux_info/meta_info.json")
expect_error(tximeta(coldata, customMetaInfo="foobar.json"),
"metadata files are missing")
# check make DGEList
library(edgeR)
y <- makeDGEList(se)
})
test_that("tximeta can import GENCODE and Ensembl", {
if (FALSE) {
### GENCODE ###
dir <- system.file("extdata", package="tximportData")
samples <- read.table(file.path(dir,"samples.txt"), header=TRUE)
files <- file.path(dir,"salmon", samples$run, "quant.sf.gz")
coldata <- data.frame(files, names=paste0("sample",1:6))
# with AnnotationHub (default)
se <- tximeta(coldata)
gse <- summarizeToGene(se)
# check adding IDs from TxDb/EnsDb
se <- addIds(se, column="TXTYPE", fromDb=TRUE)
# without AnnotationHub
se <- tximeta(coldata, useHub=FALSE)
### Ensembl ###
dir <- system.file("extdata", package="tximportData")
samples <- read.table(file.path(dir,"samples.txt"), header=TRUE)
files <- file.path(dir,"salmon_gibbs", samples$run, "quant.sf.gz")
coldata <- data.frame(files, names=paste0("sample",1:6))
# with AnnotationHub (default)
se <- tximeta(coldata, dropInfReps=TRUE, useHub=TRUE)
# without AnnotationHub
se <- tximeta(coldata, dropInfReps=TRUE, useHub=FALSE)
gse <- summarizeToGene(se)
# test the mark duplicate code
dir <- system.file("extdata", package="macrophage")
samples <- list.files(file.path(dir, "quants"))[1:3]
files <- file.path(dir,"quants", samples, "quant.sf.gz")
coldata <- data.frame(files, names=paste0("sample",1:3))
se <- tximeta(coldata, dropInfReps=TRUE, markDuplicateTxps=TRUE)
gse <- summarizeToGene(se)
}
})
test_that("tximeta can import inferential replicates", {
# don't want to rely on internet connection for tests...
if (FALSE) {
library(SummarizedExperiment)
# check the GEUVADIS samples with Salmon Gibbs samples
dir <- system.file("extdata", package="tximportData")
samples <- read.table(file.path(dir,"samples.txt"), header=TRUE)
files <- file.path(dir,"salmon_gibbs", samples$run, "quant.sf.gz")
coldata <- data.frame(files, names=paste0("sample",1:6))
se <- tximeta(coldata)
expect_true("infRep1" %in% assayNames(se))
se2 <- tximeta(coldata, varReduce=TRUE)
expect_true("variance" %in% assayNames(se2))
gse <- summarizeToGene(se)
expect_true("infRep1" %in% assayNames(gse))
gse <- summarizeToGene(se, varReduce=TRUE)
expect_true("variance" %in% assayNames(gse))
# check the macrophage dataset with Salmon Gibbs samples
dir <- system.file("extdata", package="macrophage")
coldata <- read.csv(file.path(dir, "coldata.csv"))
coldata$files <- file.path(dir, "quants", coldata$names, "quant.sf.gz")
coldata <- coldata[1:2,]
se <- tximeta(coldata)
se <- tximeta(coldata, skipSeqinfo=TRUE)
}
})
test_that("tximeta can import alevin", {
testthat::skip("There is no alevin file present, skip")
dir <- system.file("extdata", package="tximportData")
samps <- list.files(file.path(dir, "alevin"))
files <- file.path(dir,"alevin",samps[1],"alevin/quants_mat.gz")
file.exists(files)
coldata <- data.frame(files, names="neurons")
#se <- tximeta(coldata, type="alevin")
se <- tximeta(coldata, type="alevin", skipMeta=TRUE)
})
test_that("tximeta can import refseq", {
if (FALSE) {
dir <- system.file("extdata", package="tximportData")
files <- file.path(dir,"refseq/ERR188021/quant.sf.gz")
file.exists(files)
coldata <- data.frame(files, names="A")
# here the important test is if we can pull down seqinfo
# (sequence names and lengths) from RefSeq FTP site
se <- tximeta(coldata)
seqinfo(se)
}
})
test_that("tximeta can import kallisto", {
if (requireNamespace(package="rhdf5", quietly=TRUE)) {
dir <- system.file("extdata", package="tximportData")
samples <- read.table(file.path(dir,"samples.txt"), header=TRUE)
files <- file.path(dir,"kallisto", samples$run, "abundance.tsv.gz")
coldata <- data.frame(files, names=paste0("sample",1:6))
se <- tximeta(coldata, type="kallisto", txOut=TRUE)
# inferential replicates as well
library(SummarizedExperiment)
files <- file.path(dir,"kallisto_boot", samples$run, "abundance.h5")
coldata <- data.frame(files, names=paste0("sample",1:6))
se <- tximeta(coldata, type="kallisto", txOut=TRUE)
expect_true("infRep1" %in% assayNames(se))
se <- tximeta(coldata, type="kallisto", txOut=TRUE, varReduce=TRUE)
expect_true("variance" %in% assayNames(se))
}
})
|