1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
|
Author: Andreas Tille <tille@debian.org>
Last-Update: Thu, 24 Jul 2014 08:35:54 +0200
Description: Make test independent from TxDb.Hsapiens.UCSC.hg19.knownGene
and other not packaged databases (see debian/README.test)
--- a/inst/unitTests/test_locateVariants-methods.R
+++ /dev/null
@@ -1,136 +0,0 @@
-library(TxDb.Hsapiens.UCSC.hg19.knownGene)
-txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
-cdsbytx <- cdsBy(txdb, use.names=TRUE)
-intbytx <- intronsByTranscript(txdb)
-txbygene <- transcriptsBy(txdb, "gene")
-
-gr <- GRanges("chr22",
- IRanges(c(16268137, 16287254, 16190792, 16164570,
- 18209442, 18121652, 24314750, 25508661),
- width=c(1,1,1,1,3,3,2,2)),
- strand=c("-", "-", "-", "+", "+", "+", "+", "+"))
-
-test_locateVariants_upstream_downstream <- function()
-{
- loc <- locateVariants(gr, txdb, IntergenicVariants(1, 1))
- target <- CharacterList(character(), character())
- checkIdentical(loc$FOLLOWID, target)
-
- loc <- locateVariants(gr, txbygene, IntergenicVariants(2, 2))
- target <- CharacterList(character(), "100037417")
- checkIdentical(loc$FOLLOWID, target)
-
- loc <- locateVariants(gr, txbygene, IntergenicVariants(100000, 100000))
- target <- CharacterList("23784", c("100037417","4282", "66035"))
- checkIdentical(loc$FOLLOWID, target)
- target <- CharacterList(character(), c("23523", "2953", "391322"))
- checkIdentical(loc$PRECEDEID, target)
-}
-
-test_locateVariants_queryAsVCF <- function()
-{
- fl <- system.file("extdata", "gl_chr1.vcf", package="VariantAnnotation")
- vcf <- readVcf(fl, "hg19")
- vcf <- renameSeqlevels(vcf, c("1" = "chr1"))
- loc1 <- locateVariants(vcf, txdb, IntergenicVariants())
- loc2 <- locateVariants(rowRanges(vcf), txdb, IntergenicVariants())
- checkIdentical(loc1, loc2)
-}
-
-test_locateVariants_ignore.strand <- function()
-{
- cdsbytx <- cdsbytx[1:5]
- gr <- GRanges("chr1", IRanges(c(12190, 12595, 13403), width=1), "-")
- loc1 <- locateVariants(gr, cdsbytx, CodingVariants(),
- ignore.strand=TRUE)
- checkIdentical(c(1L, 2L, 3L), mcols(loc1)$QUERYID)
- loc2 <- locateVariants(gr, cdsbytx, CodingVariants(),
- ignore.strand=FALSE)
- checkIdentical(integer(), mcols(loc2)$QUERYID)
- loc1 <- locateVariants(gr, cdsbytx, SpliceSiteVariants(),
- ignore.strand=TRUE)
- checkIdentical(c(1L, 2L, 3L), mcols(loc1)$QUERYID)
- loc2 <- locateVariants(gr, cdsbytx, SpliceSiteVariants(),
- ignore.strand=FALSE)
- checkIdentical(integer(), mcols(loc2)$QUERYID)
-}
-
-test_locateVariants_asHits <- function()
-{
- gr <- GRanges("chr1", IRanges(c(12190, 69091, 13403), width=1))
- loc <- locateVariants(gr, cdsbytx, CodingVariants())
- hit <- locateVariants(gr, cdsbytx, CodingVariants(), asHits=TRUE)
- ## annotation element
- loc_nms <- as.character(mcols(loc)$TXID)
- hit_nms <- names(cdsbytx[subjectHits(hit)])
- checkIdentical(loc_nms, hit_nms)
-
- ## Hits lengths
- checkIdentical(length(gr), queryLength(hit))
- checkIdentical(length(cdsbytx), subjectLength(hit))
-}
-
-.extract <- function(x, col) as.vector(mcols(x)[[col]])
-test_locateVariants_PromoterVariants <- function()
-{
- s <- GRangesList(GRanges("chr1", IRanges(10, width=11), "+"),
- GRanges("chr1", IRanges(30, width=11) , "+"))
- ## empty
- q <- GRanges("chr1", IRanges(15, width=1), "+")
- current <- locateVariants(q, s, PromoterVariants(5, 5))
- checkTrue(length(current) == 0)
-
- ## endpoint
- q <- GRanges("chr1", IRanges(20, width=1), "+")
- current <- locateVariants(q, s, PromoterVariants(5, 5))
- checkTrue(length(current) == 0)
-
- ## strand
- q <- GRanges(c("chr1", "chr1"), IRanges(c(8, 12), width=1), "+")
- current <- locateVariants(q, s, PromoterVariants(5, 5))
- checkEquals(c(1L, 2L), .extract(current, "QUERYID"))
- strand(s) <- RleList(Rle(factor("*")), Rle(factor("*")))
- strand(q) <- "*"
- current <- suppressWarnings(locateVariants(q, s, PromoterVariants(5, 5)))
- checkEquals(c(1L, 2L), .extract(current, "QUERYID"))
- q <- GRanges(c("chr1", "chr1"), IRanges(c(21, 41), width=1), "-")
- strand(s) <- RleList(Rle(factor("-")), Rle(factor("-")))
- current <- locateVariants(q, s, PromoterVariants(5, 5))
- checkEquals(c(1L, 2L), .extract(current, "QUERYID"))
-
- q <- GRanges(c("chr2", "chr2"), IRanges(c(9, 10), width=1), "+")
- s <- GRangesList(GRanges("chr2", IRanges(10, width=11), "+"))
- current <- locateVariants(q, s, PromoterVariants(5, 0))
- checkEquals(1L, .extract(current, "QUERYID"))
- current <- locateVariants(q, s, PromoterVariants(5, 1))
- checkEquals(c(1L, 2L), .extract(current, "QUERYID"))
- current <- locateVariants(q, s, PromoterVariants(0, 0))
- checkTrue(length(current) == 0L)
-
- q <- GRanges("chr22", IRanges(50310410, 50310420))
- current <- locateVariants(q, txdb, PromoterVariants())
- checkIdentical(unique(current$GENEID), "79174")
-}
-
-test_locateVariants_match_predictCoding <- function()
-{
- library(BSgenome.Hsapiens.UCSC.hg19)
- gr <- GRanges("chr20", IRanges(
- start=c(77055, 77054, 77054, 77058, 77057, 77057, 77055),
- end=c(77055, 77055, 77055, 77058, 77058, 77058, 77054)),
- paramRangeID=rep(NA, 7))
- fixed <- DataFrame(
- REF=DNAStringSet(c('T', 'AT', 'AT', 'A', 'AA', 'AA', 'T')),
- ALT=DNAStringSetList('G', 'A', 'ATT', 'G', 'A', 'AAT', 'G'),
- QUAL=70, FILTER="PASS")
- vcf <- VCF(rowRanges=gr, fixed=fixed)
-
- ## coding regions match, zero-width
- loc <- locateVariants(vcf, txdb, CodingVariants())
- coding <- predictCoding(vcf, txdb, Hsapiens)
- checkIdentical(loc$QUERYID, as.integer(1:7))
- checkIdentical(length(coding) , 6L)
- checkIdentical(loc$CDSID[1:6], coding$CDSID)
- checkIdentical(unname(as.character(coding$VARCODON[c(1,4)])),
- as.character(DNAStringSet(c("AAG", "TAG"))))
-}
--- a/inst/unitTests/test_SIFTandPolyPhen.R
+++ /dev/null
@@ -1,39 +0,0 @@
-library(SIFT.Hsapiens.dbSNP132)
-library(PolyPhen.Hsapiens.dbSNP131)
-quiet <- suppressWarnings
-
-test_SIFT_132 <- function()
-{
- db <- SIFT.Hsapiens.dbSNP132
- scol <- columns(db)
- checkIdentical(length(scol), 10L)
-
- res <- select(db, "rs2142947")
- checkIdentical(nrow(res), 4L)
-
- res <- select(db, "rs2142947", columns="AACHANGE")
- checkIdentical(nrow(res), 1L)
-
- res <-
- quiet(select(db, keys=c("rs17970171", "INVALID", "rs17970171")))
- checkIdentical(nrow(res), 9L)
- checkTrue(all(res$RSID %in% c("rs17970171", "INVALID")))
-}
-
-test_PolyPhen <- function()
-{
- db <- PolyPhen.Hsapiens.dbSNP131
- pcol <- columns(db)
- checkIdentical(length(pcol), 58L)
-
- res <- select(db, "rs3026284")
- checkIdentical(nrow(res), 2L)
-
- res <- select(db, "rs3026284", columns="POS")
- checkIdentical(nrow(res), 1L)
-
- res <-
- suppressWarnings(select(db, keys=c("rs3026284", "INVALID", "rs3026284")))
- checkIdentical(nrow(res), 5L)
- checkTrue(all(res$RSID %in% c("rs3026284", "INVALID")))
-}
--- a/inst/unitTests/test_predictCoding-methods.R
+++ /dev/null
@@ -1,111 +0,0 @@
-quiet <- suppressWarnings
-library(BSgenome.Hsapiens.UCSC.hg19)
-fun <- VariantAnnotation:::.predictCodingGRangesList
-cdsbytx <- GRangesList(tx1=GRanges(seqnames="chr1",
- IRanges(c(10001, 10010), width=5),
- strand="+"),
- tx2=GRanges(seqnames="chr1",
- IRanges(c(10100, 10001), width=5),
- strand="-"),
- tx3=GRanges(seqnames="chr1",
- IRanges(c(10010, 10001), width=5),
- strand="-"))
-
-test_predictCoding_empty <- function()
-{
- query <- GRanges("chr1", IRanges(start=c(1, 10, 20), width=1))
- current <- fun(query, cdsbytx, Hsapiens, DNAStringSet(c("G", "T", "A")))
- checkIdentical(dim(mcols(current)), c(0L, 8L))
-}
-
-test_predictCoding_varAllele <- function()
-{
- variant=DNAStringSet(c("G", "", "C", "AA", "GGA"))
- query <- GRanges(seqnames="chr1",
- ranges=IRanges(c(rep(10003, 3), 10011, 10101),
- width=c(1, 1, 1, 2, 3)),
- strand=c("+", "-", "*", "*", "*"),
- variant=variant)
- names(query) <- LETTERS[1:5]
- current <- quiet(fun(query, cdsbytx[1:2], Hsapiens, variant))
-
- current_varaa <- values(current[names(current) == "B"])[["VARAA"]]
- checkTrue(as.character(current_varaa) == "")
-
- current_consequence <-
- values(current[names(current) == "B"])[["CONSEQUENCE"]]
- checkTrue(current_consequence == "not translated")
-
- variant=DNAStringSet(c("GGA", "GGA"))
- query <- GRanges("chr1", IRanges(rep(10101, 2), width=c(2,3)),
- variant=variant)
- current <- quiet(fun(query, cdsbytx[1:2], Hsapiens, variant))
- checkIdentical(unname(as.character(mcols(current)$VARCODON)),
- c("TATCCGG", "TTCCGG"))
-}
-
-test_mapToTranscripts <- function()
-{
- ## both in 'first' cds
- query <- GRanges(seqnames="chr1",
- ranges=IRanges(rep(c(10002, 10005), 2), width=1),
- strand=c("+", "+", "-", "-"))
- current <- mapToTranscripts(query, cdsbytx[c(1,3)], ignore.strand=FALSE)
- expected <- IRanges(c(2, 5, 9, 6), width=1)
- checkIdentical(ranges(current), expected)
-
- ## one in each cds
- query <- GRanges(seqnames="chr1",
- ranges=IRanges(rep(c(10002, 10011), 2), width=1),
- strand=c("+", "+", "-", "-"))
- current <- mapToTranscripts(query, cdsbytx[c(1,3)], ignore.strand=FALSE)
- expected <- IRanges(c(2, 7, 9, 4), width=1)
- checkIdentical(ranges(current), expected)
-
- ## both in 'last' cds
- query <- GRanges(seqnames="chr1",
- ranges=IRanges(rep(c(10010, 10013), 2), width=1),
- strand=c("+", "+", "-", "-"))
- current <- mapToTranscripts(query, cdsbytx[c(1,3)], ignore.strand=FALSE)
- expected <- IRanges(c(6, 9, 5, 2), width=1)
- checkIdentical(ranges(current), expected)
-}
-
-test_predictCoding_strand <- function()
-{
- variant=DNAStringSet(c("G", "G", "C", "T", "G"))
- query <- GRanges(seqnames="chr1",
- ranges=IRanges(c(rep(10003, 3), 10011, 10101), width=1),
- strand=c("+", "-", "*", "*", "*"),
- variant=variant)
- names(query) <- LETTERS[1:5]
-
- current <- quiet(fun(query, cdsbytx, Hsapiens, variant))
- expected <- c("G", "C", "C", "C", "G", "G", "T", "A", "C")
- checkIdentical(as.character(mcols(current)$varAllele), expected)
-
- ## query "+", subject "-"
- v <- variant[2]
- q <- query[2]
- strand(q) <- "+"
- s <- cdsbytx[3]
- current <- quiet(fun(q, s, Hsapiens, v, ignore.strand=FALSE))
- checkIdentical(length(current), 0L)
-
- current <- quiet(fun(q, s, Hsapiens, v, ignore.strand=TRUE))
- checkIdentical(as.character(mcols(current)$REFAA), "V")
- checkIdentical(as.character(mcols(current)$VARAA), "A")
- checkIdentical(mcols(current)$CDSLOC, IRanges(8, 8))
-
- ## query "-", subject "+"
- strand(q) <- "-"
- s <- cdsbytx[1]
- current <- quiet(fun(q, s, Hsapiens, v, ignore.strand=FALSE))
- checkIdentical(length(current), 0L)
-
- current <- quiet(fun(q, s, Hsapiens, v, ignore.strand=TRUE))
- checkIdentical(as.character(mcols(current)$REFAA), "*")
- checkIdentical(as.character(mcols(current)$VARAA), "*")
- checkIdentical(mcols(current)$CDSLOC, IRanges(3, 3))
-}
-
|