1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
#' Plot the indel contexts
#'
#' @details
#' Plots the number of indels COSMIC context per sample.
#' It takes a tibble with counts as its input. This tibble can be generated by 'count_indel_contexts()'.
#' Each sample is plotted in a separate facet.
#' The same y axis can be used for all samples or a separate y axis can be used.
#' The facets at the top show the indel types. First the C and T deletions
#' Then the C and T insertions. Next are the multi base deletions and insertions.
#' Finally the deletions with microhomology (mh) are shown.
#' The x-axis at the bottom shows the number of repeat units.
#' For mh deletions the microhomology length is shown.
#'
#' @param counts A tibble containing the number of indels per COSMIC context.
#' @param same_y A boolean describing whether the same y axis should be used for all samples.
#' @param extra_labels A boolean describing whether extra labels should be added.
#' These can clarify the plot, but will shift when different plot widths are used.
#' We recommend saving a plot with a width of 12, when using this argument.
#' @param condensed More condensed plotting format. Default = F.
#'
#' @return A ggplot figure.
#'
#' @examples
#' ## Get The indel counts
#' ## See 'count_indel_contexts()' for more info on how to do this.
#' indel_counts <- readRDS(system.file("states/blood_indel_counts.rds",
#' package = "MutationalPatterns"
#' ))
#'
#' ## Plot contexts
#' plot_indel_contexts(indel_counts)
#'
#' ## Use the same y axis for all samples.
#' plot_indel_contexts(indel_counts, same_y = TRUE)
#'
#' ## Add extra labels to make plot clearer
#' plot_indel_contexts(indel_counts, extra_labels = TRUE)
#'
#' ## Create a more condensed plot
#' plot_indel_contexts(indel_counts, condensed = TRUE)
#' @import ggplot2
#' @importFrom magrittr %>%
#' @family Indels
#'
#' @seealso \code{\link{count_indel_contexts}}, \code{\link{plot_main_indel_contexts}}
#'
#' @export
plot_indel_contexts <- function(counts, same_y = FALSE, extra_labels = FALSE, condensed = FALSE) {
# These variables use non standard evaluation.
# To avoid R CMD check complaints we initialize them to NULL.
count <- muttype <- muttype_sub <- muttype_total <- sample <- NULL
# Separate muttype and muttype_sub. Then make data long
counts <- counts %>%
as.data.frame() %>%
tibble::rownames_to_column("muttype_total") %>%
tidyr::separate(muttype_total, c("muttype", "muttype_sub"), sep = "_(?=[0-9])") %>%
dplyr::mutate(muttype = factor(muttype, levels = unique(muttype))) %>%
tidyr::gather(key = "sample", value = "count", -muttype, -muttype_sub) %>%
dplyr::mutate(sample = factor(sample, levels = unique(sample)))
# Count nr mutations. (This is used for the facets)
nr_muts <- counts %>%
dplyr::group_by(sample) %>%
dplyr::summarise(nr_muts = round(sum(count)))
# Create facet texts
facet_labs_y <- stringr::str_c(nr_muts$sample, " (n = ", nr_muts$nr_muts, ")")
names(facet_labs_y) <- nr_muts$sample
facet_labs_x <- c("1: C", "1: T", "1: C", "1: T", 2, 3, 4, "5+", 2, 3, 4, "5+", 2, 3, 4, "5+")
names(facet_labs_x) <- levels(counts$muttype)
# Set plotting parameters
if (same_y) {
facet_scale <- "free_x"
} else {
facet_scale <- "free"
}
colors <- c(
"#FDBE6F", "#FF8001", "#B0DD8B", "#36A12E", "#FDCAB5", "#FC8A6A",
"#F14432", "#BC141A", "#D0E1F2", "#94C4DF", "#4A98C9", "#1764AB",
"#E2E2EF", "#B6B6D8", "#8683BD", "#61409B"
)
# Add optional extra labels
if (extra_labels) {
title <- stringr::str_c(
"Deletion ",
"Insertion ",
"Deletion ",
"Insertion ",
"Deletion (MH)"
)
x_lab <- stringr::str_c(
"Homopolymer length ",
"Number of repeat units ",
"Microhomology length"
)
} else {
title <- x_lab <- ""
}
# Change plotting parameters based on whether plot should be condensed.
if (condensed == TRUE) {
width <- 1
spacing <- 0
} else {
width <- 0.6
spacing <- 0.5
}
# Create figure
fig <- ggplot(counts, aes(x = muttype_sub, y = count, fill = muttype, width = width)) +
geom_bar(stat = "identity") +
facet_grid(sample ~ muttype,
scales = facet_scale, space = "free_x",
labeller = labeller(muttype = facet_labs_x, sample = facet_labs_y)
) +
scale_fill_manual(values = colors) +
theme_bw() +
labs(fill = "Mutation type", title = title, y = "Nr of indels", x = x_lab) +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.spacing.x = unit(spacing, "lines")
)
return(fig)
}
|