1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
## ----eval=FALSE---------------------------------------------------------------
# tabix.file <- TabixFile(file.gz, yieldSize=10000)
# filterVcf(tabix.file, genome, destination.file,
# prefilters=prefilters, filters=filters)
## ----prefilters---------------------------------------------------------------
isGermlinePrefilter <- function(x) {
grepl("Germline", x, fixed=TRUE)
}
notInDbsnpPrefilter <- function(x) {
!(grepl("dbsnp", x, fixed=TRUE))
}
## ----filters------------------------------------------------------------------
## We will use isSNV() to filter only SNVs
allelicDepth <- function(x) {
## ratio of AD of the "alternate allele" for the tumor sample
## OR "reference allele" for normal samples to total reads for
## the sample should be greater than some threshold (say 0.1,
## that is: at least 10% of the sample should have the allele
## of interest)
ad <- geno(x)$AD
tumorPct <- ad[,1,2,drop=FALSE] / rowSums(ad[,1,,drop=FALSE])
normPct <- ad[,2,1, drop=FALSE] / rowSums(ad[,2,,drop=FALSE])
test <- (tumorPct > 0.1) | (normPct > 0.1)
as.vector(!is.na(test) & test)
}
## ----createFilterRules, message=FALSE, warning=FALSE--------------------------
library(VariantAnnotation)
prefilters <- FilterRules(list(germline=isGermlinePrefilter,
dbsnp=notInDbsnpPrefilter))
filters <- FilterRules(list(isSNV=isSNV, AD=allelicDepth))
## ----createFilteredFile, message=FALSE, warning=FALSE-------------------------
file.gz <- system.file("extdata", "chr7-sub.vcf.gz",
package="VariantAnnotation")
file.gz.tbi <- system.file("extdata", "chr7-sub.vcf.gz.tbi",
package="VariantAnnotation")
destination.file <- tempfile()
tabix.file <- TabixFile(file.gz, yieldSize=10000)
filterVcf(tabix.file, "hg19", destination.file,
prefilters=prefilters, filters=filters, verbose=TRUE)
## ----mcf7regulatoryRegions, message=FALSE, warning=FALSE----------------------
library(AnnotationHub)
hub <- AnnotationHub()
id <- names(query(hub, "wgEncodeUwTfbsMcf7CtcfStdPkRep1.narrowPeak"))
mcf7.gr <- hub[[tail(id, 1)]]
## ----findOverlaps-------------------------------------------------------------
vcf <- readVcf(destination.file, "hg19")
seqlevels(vcf) <- paste("chr", seqlevels(vcf), sep="")
ov.mcf7 <- findOverlaps(vcf, mcf7.gr)
## ----locateVariant, message=FALSE, warning=FALSE------------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
locateVariants(vcf[6,], txdb, AllVariants())
## ----eval=FALSE---------------------------------------------------------------
# library(VariantAnnotation)
# file.gz <- "somaticVcfBeta-HCC1187-H-200-37-ASM-T1-N1.vcf.gz"
# stopifnot(file.exists(file.gz))
# file.gz.tbi <- paste(file.gz, ".tbi", sep="")
# if(!(file.exists(file.gz.tbi)))
# indexTabix(file.gz, format="vcf")
# start.loc <- 55000000
# end.loc <- 56000000
# chr7.gr <- GRanges("7", IRanges(start.loc, end.loc))
# params <- ScanVcfParam(which=chr7.gr)
# vcf <- readVcf(TabixFile(file.gz), "hg19", params)
# writeVcf(vcf, "chr7-sub.vcf")
# bgzip("chr7-sub.vcf", overwrite=TRUE)
# indexTabix("chr7-sub.vcf.gz", format="vcf")
|