File: filterVcf.R

package info (click to toggle)
r-bioc-variantannotation 1.10.5-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,172 kB
  • ctags: 109
  • sloc: ansic: 1,088; sh: 4; makefile: 2
file content (97 lines) | stat: -rw-r--r-- 3,547 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
### R code from vignette source 'filterVcf.Rnw'

###################################################
### code chunk number 1: setup
###################################################
options(width=80, prompt = " ", continue = " ")


###################################################
### code chunk number 2: prefilters
###################################################
isGermlinePrefilter <- function(x) {
    grepl("Germline", x, fixed=TRUE)
    }

notInDbsnpPrefilter <- function(x) {
    !(grepl("dbsnp", x, fixed=TRUE))
    }


###################################################
### code chunk number 3: filters
###################################################
isSnp <- function(x) {
    refSnp <- nchar(ref(x)) == 1L
    a <- alt(x)
    altSnp <- elementLengths(a) == 1L
    ai <- unlist(a[altSnp])    # all length 1, so unlisting is 1:1 map
    altSnp[altSnp] <- nchar(ai) == 1L & (ai %in% c("A", "C", "G", "T"))
    refSnp & altSnp
    }

allelicDepth <- function(x) {
    ##  ratio of AD of the "alternate allele" for the tumor sample
    ##  OR "reference allele" for normal samples to total reads for
    ##  the sample should be greater than some threshold (say 0.1,
    ##  that is: at least 10% of the sample should have the allele
    ##  of interest)
    ad <- geno(x)$AD
    tumorPct <- ad[,1,2,drop=FALSE] / rowSums(ad[,1,,drop=FALSE])
    normPct <- ad[,2,1, drop=FALSE] / rowSums(ad[,2,,drop=FALSE])
    test <- (tumorPct > 0.1) | (normPct > 0.1)
    !is.na(test) & test
    }


###################################################
### code chunk number 4: createFilterRules
###################################################
library(VariantAnnotation)
prefilters <- FilterRules(list(germline=isGermlinePrefilter, 
                               dbsnp=notInDbsnpPrefilter))
filters <- FilterRules(list(isSnp=isSnp, AD=allelicDepth))


###################################################
### code chunk number 5: createFilteredFile
###################################################
file.gz     <- system.file("extdata", "chr7-sub.vcf.gz", 
                           package="VariantAnnotation")
file.gz.tbi <- system.file("extdata", "chr7-sub.vcf.gz.tbi", 
                           package="VariantAnnotation")
destination.file <- tempfile()
tabix.file <- TabixFile(file.gz, yieldSize=10000)
filterVcf(tabix.file,  "hg19", destination.file,
          prefilters=prefilters, filters=filters, verbose=TRUE)



###################################################
### code chunk number 6: mcf7regulatoryRegions (eval = FALSE)
###################################################
## library(AnnotationHub)
## hub <- AnnotationHub()
##     # paste operation is only for display purposes on a narrow page
## ctcf.regs <- paste("goldenpath.hg19.encodeDCC.wgEncodeUwTfbs",
##                    "wgEncodeUwTfbsMcf7CtcfStdPkRep1.narrowPeak_0.0.1.RData",
##                   sep=".")
## mcf7.gr <- hub[[ctcf.regs]]


###################################################
### code chunk number 7: findOverlaps (eval = FALSE)
###################################################
## vcf <- readVcf(destination.file, "hg19")
## seqlevels(vcf) <- paste("chr", seqlevels(vcf), sep="")
## ov.mcf7 <- findOverlaps(vcf, mcf7.gr)


###################################################
### code chunk number 8: locateVariant (eval = FALSE)
###################################################
## library(TxDb.Hsapiens.UCSC.hg19.knownGene)
## txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene 
## print(locateVariants(vcf[6,], txdb, AllVariants()))