1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
#' Split GRangesList or GRanges based on a list of regions.
#'
#' A GRangesList or GRanges object containing variants is split based on a list of regions.
#' This list can be either a GRangesList or a GRanges object.
#' The result is a GRangesList where each element contains the variants of one sample from one region.
#' Variant that are not in any of the provided region are put in a list of 'other'.
#'
#' @param vcf_list GRangesList or GRanges object
#' @param ranges_grl GRangesList or GRanges object containing regions of interest
#' @param include_other Boolean. Whether or not to include a "Other" region
#' containing mutations that aren't in any other region.
#'
#' @return GRangesList
#' @export
#' @family genomic_regions
#' @importFrom magrittr %>%
#' @examples
#'
#' ## Read in some existing genomic regions.
#' ## See the 'genomic_distribution()' example for how we obtained the
#' ## following data:
#' CTCF_g <- readRDS(system.file("states/CTCF_g_data.rds",
#' package = "MutationalPatterns"
#' ))
#' promoter_g <- readRDS(system.file("states/promoter_g_data.rds",
#' package = "MutationalPatterns"
#' ))
#' flanking_g <- readRDS(system.file("states/promoter_flanking_g_data.rds",
#' package = "MutationalPatterns"
#' ))
#'
#' ## Combine the regions into a single GRangesList
#' regions <- GRangesList(promoter_g, flanking_g, CTCF_g)
#'
#' names(regions) <- c("Promoter", "Promoter flanking", "CTCF")
#'
#' ## Read in some variants.
#' ## See the 'read_vcfs_as_granges()' example for how we obtained the
#' ## following data:
#' grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
#' package = "MutationalPatterns"
#' ))
#'
#' ## Split muts based on the supplied regions
#' split_muts_region(grl, regions)
#'
#' ## Don't include muts outside of the supplied regions
#' split_muts_region(grl, regions, include_other = FALSE)
split_muts_region <- function(vcf_list, ranges_grl, include_other = TRUE) {
# These variables use non standard evaluation.
# To avoid R CMD check complaints we initialize them to NULL.
. <- NULL
# Check arguments
if (inherits(vcf_list, "CompressedGRangesList")) {
if (is.null(names(vcf_list))) {
stop("Please set sample names (without dots) for the vcf_list with `names(vcf_list) = my_names`", call. = FALSE)
}
if (any(stringr::str_detect(names(vcf_list), "\\."))) {
stop("The sample names of the vcf_list should not contain dots. Please fix them with `names(vcf_list) = my_names`", call. = FALSE)
}
}
if (any(stringr::str_detect(names(ranges_grl), "\\."))) {
stop("The names of the ranges_grl should not contain dots. Please fix them with `names(ranges_grl) = my_names`", call. = FALSE)
}
# Turn grl into list.
if (inherits(vcf_list, "CompressedGRangesList")) {
vcf_list <- as.list(vcf_list)
}
# Get muttype per sample
if (inherits(vcf_list, "list")) {
grl <- purrr::map(vcf_list, function(x) .split_muts_region_gr(x, ranges_grl, include_other)) %>%
purrr::map(as.list) %>% # Create a list of lists. Outer layer: samples. Inner layer: regions.
do.call(c, .) %>%
GenomicRanges::GRangesList()
return(grl)
} else if (inherits(vcf_list, "GRanges")) {
grl <- .split_muts_region_gr(vcf_list, ranges_grl, include_other)
return(grl)
} else {
.not_gr_or_grl(vcf_list)
}
}
#' Split a GRanges object based on a list of regions.
#'
#' A GRanges object containing variants is split based on a list of regions.
#' This list can be either a GRangesList or a GRanges object.
#' The result is a GRangesList where each element contains the variants of one sample from one region.
#' Variant that are not in any of the provided region are put in a list of 'other'.
#'
#' @param gr GRanges object containing variants
#' @param ranges_grl GRangesList or GRanges object containing regions of interest
#' @param include_other Boolean. Whether or not to include a "Other" region
#' containing mutations that aren't in any other region.
#' @noRd
#' @return GRangesList
#'
.split_muts_region_gr <- function(gr, ranges_grl, include_other) {
# Get the muts overlapping the ranges_grl
if (inherits(ranges_grl, "CompressedGRangesList")) {
ranges_list <- as.list(ranges_grl)
gr_sub_l <- purrr::map(ranges_list, ~ .get_muts_region(gr, .x))
} else if (inherits(ranges_grl, "GRanges")) {
gr_sub <- .get_muts_region(gr, ranges_grl)
gr_sub_l <- list("In_region" = gr_sub)
} else {
.not_gr_or_grl(ranges_grl)
}
if (include_other) {
# Get the other muts
hits <- GenomicRanges::findOverlaps(gr, ranges_grl)
gr_other <- gr[-S4Vectors::queryHits(hits)]
gr_sub_l[["Other"]] <- gr_other
}
# Combine data
grl <- GenomicRanges::GRangesList(gr_sub_l)
return(grl)
}
#' Split a GRanges object based on a list of regions.
#'
#' A GRanges object containing variants is split based on a list of regions.
#' This list can be either a GRangesList or a GRanges object.
#' The result is a GRangesList where each element contains the variants of one sample from one region.
#' Variant that are not in any of the provided region are put in a list of 'other'.
#'
#' @param gr GRanges object containing variants
#' @param ranges GRanges object containing regions of interest
#' @noRd
#' @return GRanges object
#'
.get_muts_region <- function(gr, ranges) {
hits <- GenomicRanges::findOverlaps(gr, ranges)
gr_sub <- gr[S4Vectors::queryHits(hits)]
return(gr_sub)
}
|