File: test_filterVcf.R

package info (click to toggle)
r-bioc-variantannotation 1.10.5-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,172 kB
  • ctags: 109
  • sloc: ansic: 1,088; sh: 4; makefile: 2
file content (138 lines) | stat: -rw-r--r-- 4,649 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
test_filterVcf_TabixFile <- function()
{
    fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation")
    tbx <- TabixFile(fl, yieldSize=5000)
    dest <- tempfile()
    filt <- FilterRules(list(fun=function(...) TRUE))
    ans <- filterVcf(tbx, "hg19", dest, filters=filt, verbose=FALSE)
 
    checkIdentical(dest, ans)
    vcf0 <- readVcf(fl, "hg19")
    vcf1 <- readVcf(dest, "hg19")
    checkIdentical(dim(vcf0), dim(vcf1))
}

test_filterVcf_filter <- function()
{
    fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation")
    tbx <- TabixFile(fl, yieldSize=5000)

    filt <- FilterRules(list(filt1=function(x) {
        rowSums(geno(x)$DS > 0.5) > 0
    }))
    dest <- tempfile()

    ans <- filterVcf(tbx, "hg19", dest, filters=filt, verbose=FALSE)
    vcf0 <- subsetByFilter(readVcf(fl, "hg19"), filt)
    vcf1 <- readVcf(ans, "hg19")
    checkIdentical(dim(vcf0), dim(vcf1))
}

test_filterVcf_prefilter_only <- function()
{
    fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation")
    tbx <- TabixFile(fl, yieldSize=5000)

    filt <- FilterRules(list(filt1=function(x) {
        grepl("LOWCOV", x, fixed=TRUE)
    }))
    dest <- tempfile()

    ans <- filterVcf(tbx, "hg19", dest, prefilters=filt, verbose=FALSE)

    vcf <- readVcf(fl, "hg19")
    idx <- any(info(vcf)$SNPSOURCE == "LOWCOV")
    vcf0 <- vcf[!is.na(idx) & idx,]
    vcf1 <- readVcf(ans, "hg19")
    checkIdentical(dim(vcf0), dim(vcf1))
}

test_filterOnSomaticStatus <- function()
{
    f <- system.file("extdata", "h1187-10k.vcf.gz", package="VariantAnnotation")
    vcf <- readVcf(f, "hg19")
    somaticStatus <- as.list(table(info(vcf)$SS))
    checkEquals(somaticStatus, list(Germline=103, LOH=1, Somatic=4))

    somaticStatusGermlineFilter <- function(x){
        !is.na(info(x)$SS) & info(x)$SS=="Germline"
        }
 
        # filter the in-memory data structure
    filters <- FilterRules(list(filter.1=somaticStatusGermlineFilter))
    checkEquals(dim(subsetByFilter(vcf, filters)), c(103, 2))
}

test_prefilterOnSomaticStatus <- function()
{
    f <- system.file("extdata", "h1187-10k.vcf.gz", package="VariantAnnotation")
    tabix.file <- TabixFile(f, yieldSize=1000)

    isGermline=function(x) {
        grepl("Germline", x, fixed=TRUE)
        }

    prefilteringFunctions <- FilterRules(list(isGermline=isGermline))
    filtered.filename <- filterVcf(tabix.file, "hg19", tempfile(),
                                   prefilters=prefilteringFunctions,
                                   verbose=FALSE)

    checkEquals(nrow(readVcf(filtered.filename, "hg19")), 103)
} 

test_filterOnSnps <- function()
{
    f <- system.file("extdata", "h1187-10k.vcf.gz", package="VariantAnnotation")
    tabix.file <- TabixFile(f, yieldSize=1000)

       # filter only on snp
    isSnp=function(x) {
        refSnp <- nchar(ref(x)) == 1L
        a <- alt(x)
        altSnp <- elementLengths(a) == 1L
        ai <- unlist(a[altSnp])    # all length 1, so unlisting is 1:1 map
        altSnp[altSnp] <- nchar(ai) == 1L & (ai %in% c("A", "C", "G", "T"))
        refSnp & altSnp
        }
 
    filteringFunctions <- FilterRules(list(isSnp=isSnp))
    filtered.filename <- filterVcf(tabix.file, "hg19", tempfile(),
                                   filters=filteringFunctions,
                                   verbose=FALSE)
      # now check the results
    vcf.germline <- readVcf(filtered.filename, "hg19")   # 
    checkEquals(nrow(vcf.germline), 186)
}

test_prefilterOnSomaticStatusThenFilterOnSnps <- function()
{
    f <- system.file("extdata", "h1187-10k.vcf.gz", package="VariantAnnotation")
    tabix.file <- TabixFile(f, yieldSize=1000)

    isGermline=function(x) {
        grepl("Germline", x, fixed=TRUE)
        }

    prefilteringFunctions <- FilterRules(list(isGermline=isGermline))

    isSnp=function(x) {
        refSnp <- nchar(ref(x)) == 1L
        a <- alt(x)
        altSnp <- elementLengths(a) == 1L
        ai <- unlist(a[altSnp])    # all length 1, so unlisting is 1:1 map
        altSnp[altSnp] <- nchar(ai) == 1L & (ai %in% c("A", "C", "G", "T"))
        refSnp & altSnp
        }
 
    filteringFunctions <- FilterRules(list(isSnp=isSnp))
 
        # now filter on both.  should be 98 rows
    filtered.filename <- filterVcf(tabix.file, "hg19", tempfile(),
                                   prefilters=prefilteringFunctions,
                                   filters=filteringFunctions,
                                   verbose=FALSE)

      # now check the results
    vcf.germline.snp <- readVcf(filtered.filename, "hg19")   # 
    checkEquals(nrow(vcf.germline.snp), 98)
}