File: read_vcfs_as_granges.Rd

package info (click to toggle)
r-bioc-mutationalpatterns 3.0.1%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 5,908 kB
  • sloc: sh: 8; makefile: 2
file content (100 lines) | stat: -rw-r--r-- 3,538 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_vcfs_as_granges.R
\name{read_vcfs_as_granges}
\alias{read_vcfs_as_granges}
\title{Read VCF files into a GRangesList}
\usage{
read_vcfs_as_granges(
  vcf_files,
  sample_names,
  genome,
  group = c("auto+sex", "auto", "sex", "circular", "all", "none"),
  type = c("snv", "indel", "dbs", "mbs", "all"),
  change_seqnames = TRUE
)
}
\arguments{
\item{vcf_files}{Character vector of VCF file names}

\item{sample_names}{Character vector of sample names}

\item{genome}{A string matching the name of a BSgenome library
corresponding to the reference genome of your VCFs}

\item{group}{Selector for a seqlevel group.  All seqlevels outside
of this group will be removed.  Possible values:
* 'all' for all chromosomes;
* 'auto' for autosomal chromosomes;
* 'sex' for sex chromosomes;
* 'auto+sex' for autosomal + sex chromosomes (default);
* 'circular' for circular chromosomes;
* 'none' for no filtering, which results in keeping all
  seqlevels from the VCF file.}

\item{type}{The mutation type that will be loaded. All other variants will be filtered out.
            Possible values:
            * 'snv'
            * 'indel'
            * 'dbs'
            * 'mbs'
            * 'all'
This function assumes that dbs and mbs variants are present in the vcf as SNVs,
which are positioned next to each other. If your dbs/mbs variants are called
separately you should use type = 'all' to prevent incorrect filtering.
In those cases SNVs could be selected per sample by something like:
'gr[width(gr) == 1]'}

\item{change_seqnames}{Boolean. Whether to change the seqnamesStyle of the vcf
to that of the BSgenome object. (default = TRUE)}
}
\value{
A GRangesList containing the GRanges obtained from 'vcf_files'
}
\description{
This function reads Variant Call Format (VCF) files into a GRanges object
and combines them in a GRangesList.  In addition to loading the files, this
function applies the same seqlevel style to the GRanges objects as the
reference genome passed in the 'genome' parameter.
}
\examples{
## The example data set consists of three colon samples, three intestine
## samples and three liver samples.  So, to map each file to its appropriate
## sample name, we create a vector containing the sample names:
sample_names <- c(
  "colon1", "colon2", "colon3",
  "intestine1", "intestine2", "intestine3",
  "liver1", "liver2", "liver3"
)

## We assemble a list of files we want to load.  These files match the
## sample names defined above.
vcf_files <- list.files(system.file("extdata",
  package = "MutationalPatterns"
),
pattern = "sample.vcf", full.names = TRUE
)

## Get a reference genome BSgenome object.
ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
library("BSgenome")
library(ref_genome, character.only = TRUE)

## This function loads the files as GRanges objects.
## For backwards compatability reasons it only loads SNVs by default
vcfs <- read_vcfs_as_granges(vcf_files, sample_names, ref_genome)

## To load all variant types use:
vcfs <- read_vcfs_as_granges(vcf_files, sample_names, ref_genome, type = "all")

## Loading only indels can be done like this.

## Select data containing indels.
vcf_fnames <- list.files(system.file("extdata", package = "MutationalPatterns"),
  pattern = "blood.*vcf", full.names = TRUE
)
sample_names <- c("AC", "ACC55", "BCH")

## Read data and select only the indels.
## Other mutation types can be read in the same way.
read_vcfs_as_granges(vcf_fnames, sample_names, ref_genome, type = "indel")
}