1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
test_filterVcf_TabixFile <- function()
{
fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation")
tbx <- TabixFile(fl, yieldSize=5000)
dest <- tempfile()
filt <- FilterRules(list(fun=function(...) TRUE))
ans <- filterVcf(tbx, "hg19", dest, filters=filt, verbose=FALSE)
checkIdentical(dest, ans)
vcf0 <- readVcf(fl, "hg19")
vcf1 <- readVcf(dest, "hg19")
checkIdentical(dim(vcf0), dim(vcf1))
}
test_filterVcf_filter <- function()
{
fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation")
tbx <- TabixFile(fl, yieldSize=5000)
filt <- FilterRules(list(filt1=function(x) {
rowSums(geno(x)$DS > 0.5) > 0
}))
dest <- tempfile()
ans <- filterVcf(tbx, "hg19", dest, filters=filt, verbose=FALSE)
vcf0 <- subsetByFilter(readVcf(fl, "hg19"), filt)
vcf1 <- readVcf(ans, "hg19")
checkIdentical(dim(vcf0), dim(vcf1))
}
test_filterVcf_prefilter_only <- function()
{
fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation")
tbx <- TabixFile(fl, yieldSize=5000)
filt <- FilterRules(list(filt1=function(x) {
grepl("LOWCOV", x, fixed=TRUE)
}))
dest <- tempfile()
ans <- filterVcf(tbx, "hg19", dest, prefilters=filt, verbose=FALSE)
vcf <- readVcf(fl, "hg19")
idx <- any(info(vcf)$SNPSOURCE == "LOWCOV")
vcf0 <- vcf[!is.na(idx) & idx,]
vcf1 <- readVcf(ans, "hg19")
checkIdentical(dim(vcf0), dim(vcf1))
}
test_filterOnSomaticStatus <- function()
{
f <- system.file("extdata", "h1187-10k.vcf.gz", package="VariantAnnotation")
vcf <- readVcf(f, "hg19")
somaticStatus <- as.list(table(info(vcf)$SS))
checkEquals(somaticStatus, list(Germline=103, LOH=1, Somatic=4))
somaticStatusGermlineFilter <- function(x){
!is.na(info(x)$SS) & info(x)$SS=="Germline"
}
# filter the in-memory data structure
filters <- FilterRules(list(filter.1=somaticStatusGermlineFilter))
checkEquals(dim(subsetByFilter(vcf, filters)), c(103, 2))
}
test_prefilterOnSomaticStatus <- function()
{
f <- system.file("extdata", "h1187-10k.vcf.gz", package="VariantAnnotation")
tabix.file <- TabixFile(f, yieldSize=1000)
isGermline=function(x) {
grepl("Germline", x, fixed=TRUE)
}
prefilteringFunctions <- FilterRules(list(isGermline=isGermline))
filtered.filename <- filterVcf(tabix.file, "hg19", tempfile(),
prefilters=prefilteringFunctions,
verbose=FALSE)
checkEquals(nrow(readVcf(filtered.filename, "hg19")), 103)
}
test_filterOnSnps <- function()
{
f <- system.file("extdata", "h1187-10k.vcf.gz", package="VariantAnnotation")
tabix.file <- TabixFile(f, yieldSize=1000)
# filter only on snp
isSnp=function(x) {
refSnp <- nchar(ref(x)) == 1L
a <- alt(x)
altSnp <- elementLengths(a) == 1L
ai <- unlist(a[altSnp]) # all length 1, so unlisting is 1:1 map
altSnp[altSnp] <- nchar(ai) == 1L & (ai %in% c("A", "C", "G", "T"))
refSnp & altSnp
}
filteringFunctions <- FilterRules(list(isSnp=isSnp))
filtered.filename <- filterVcf(tabix.file, "hg19", tempfile(),
filters=filteringFunctions,
verbose=FALSE)
# now check the results
vcf.germline <- readVcf(filtered.filename, "hg19") #
checkEquals(nrow(vcf.germline), 186)
}
test_prefilterOnSomaticStatusThenFilterOnSnps <- function()
{
f <- system.file("extdata", "h1187-10k.vcf.gz", package="VariantAnnotation")
tabix.file <- TabixFile(f, yieldSize=1000)
isGermline=function(x) {
grepl("Germline", x, fixed=TRUE)
}
prefilteringFunctions <- FilterRules(list(isGermline=isGermline))
isSnp=function(x) {
refSnp <- nchar(ref(x)) == 1L
a <- alt(x)
altSnp <- elementLengths(a) == 1L
ai <- unlist(a[altSnp]) # all length 1, so unlisting is 1:1 map
altSnp[altSnp] <- nchar(ai) == 1L & (ai %in% c("A", "C", "G", "T"))
refSnp & altSnp
}
filteringFunctions <- FilterRules(list(isSnp=isSnp))
# now filter on both. should be 98 rows
filtered.filename <- filterVcf(tabix.file, "hg19", tempfile(),
prefilters=prefilteringFunctions,
filters=filteringFunctions,
verbose=FALSE)
# now check the results
vcf.germline.snp <- readVcf(filtered.filename, "hg19") #
checkEquals(nrow(vcf.germline.snp), 98)
}
|