1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/genomic_distribution.R
\name{genomic_distribution}
\alias{genomic_distribution}
\title{Find overlaps between mutations and a genomic region.}
\usage{
genomic_distribution(vcf_list, surveyed_list, region_list)
}
\arguments{
\item{vcf_list}{GRangesList or GRanges object.}
\item{surveyed_list}{A GRangesList or a list with GRanges of regions of
the genome that have been surveyed (e.g. determined using GATK CallableLoci).}
\item{region_list}{A GRangesList or a list with GRanges objects containing
locations of genomic regions.}
}
\value{
A data.frame containing the number observed and number of expected
mutations in each genomic region.
}
\description{
Function finds the number of mutations that reside in genomic region and
takes surveyed area of genome into account.
}
\examples{
## See the 'read_vcfs_as_granges()' example for how we obtained the
## following data:
vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
package = "MutationalPatterns"
))
## Use biomaRt to obtain data.
## We can query the BioMart database, but this may take a long time,
## so we take some shortcuts by loading the results from our
## examples. The corresponding code for downloading the data can be
## found above the command we run.
# mart="ensemble"
# library(biomaRt)
# regulatory <- useEnsembl(biomart="regulation",
# dataset="hsapiens_regulatory_feature",
# GRCh = 37)
regulatory <- readRDS(system.file("states/regulatory_data.rds",
package = "MutationalPatterns"
))
## Download the regulatory CTCF binding sites and convert them to
## a GRanges object.
# CTCF <- getBM(attributes = c('chromosome_name',
# 'chromosome_start',
# 'chromosome_end',
# 'feature_type_name'),
# filters = "regulatory_feature_type_name",
# values = "CTCF Binding Site",
# mart = regulatory)
#
# CTCF_g <- reduce(GRanges(CTCF$chromosome_name,
# IRanges(CTCF$chromosome_start,
# CTCF$chromosome_end)))
CTCF_g <- readRDS(system.file("states/CTCF_g_data.rds",
package = "MutationalPatterns"
))
## Download the promoter regions and conver them to a GRanges object.
# promoter = getBM(attributes = c('chromosome_name', 'chromosome_start',
# 'chromosome_end', 'feature_type_name'),
# filters = "regulatory_feature_type_name",
# values = "Promoter",
# mart = regulatory)
# promoter_g = reduce(GRanges(promoter$chromosome_name,
# IRanges(promoter$chromosome_start,
# promoter$chromosome_end)))
promoter_g <- readRDS(system.file("states/promoter_g_data.rds",
package = "MutationalPatterns"
))
# flanking = getBM(attributes = c('chromosome_name',
# 'chromosome_start',
# 'chromosome_end',
# 'feature_type_name'),
# filters = "regulatory_feature_type_name",
# values = "Promoter Flanking Region",
# mart = regulatory)
# flanking_g = reduce(GRanges(
# flanking$chromosome_name,
# IRanges(flanking$chromosome_start,
# flanking$chromosome_end)))
flanking_g <- readRDS(system.file("states/promoter_flanking_g_data.rds",
package = "MutationalPatterns"
))
regions <- GRangesList(promoter_g, flanking_g, CTCF_g)
names(regions) <- c("Promoter", "Promoter flanking", "CTCF")
# Use a naming standard consistently.
seqlevelsStyle(regions) <- "UCSC"
## Get the filename with surveyed/callable regions
surveyed_file <- system.file("extdata/callableloci-sample.bed",
package = "MutationalPatterns"
)
## Import the file using rtracklayer and use the UCSC naming standard
library(rtracklayer)
surveyed <- import(surveyed_file)
seqlevelsStyle(surveyed) <- "UCSC"
## For this example we use the same surveyed file for each sample.
surveyed_list <- rep(list(surveyed), 9)
## Calculate the number of observed and expected number of mutations in
## each genomic regions for each sample.
distr <- genomic_distribution(vcfs, surveyed_list, regions)
}
\seealso{
\code{\link{read_vcfs_as_granges}}
}
|