File: plot_indel_contexts.R

package info (click to toggle)
r-bioc-mutationalpatterns 3.0.1%2Bdfsg-2
links: PTS, VCS
area: main
in suites: bullseye
size: 5,908 kB
sloc: sh: 8; makefile: 2
file content (130 lines) | stat: -rw-r--r-- 4,800 bytes
#' Plot the indel contexts
#'
#' @details
#' Plots the number of indels  COSMIC context per sample.
#' It takes a tibble with counts as its input. This tibble can be generated by 'count_indel_contexts()'.
#' Each sample is plotted in a separate facet.
#' The same y axis can be used for all samples or a separate y axis can be used.
#' The facets at the top show the indel types. First the C and T deletions
#' Then the C and T insertions. Next are the multi base deletions and insertions.
#' Finally the deletions with microhomology (mh) are shown.
#' The x-axis at the bottom shows the number of repeat units.
#' For mh deletions the microhomology length is shown.
#'
#' @param counts A tibble containing the number of indels per COSMIC context.
#' @param same_y A boolean describing whether the same y axis should be used for all samples.
#' @param extra_labels A boolean describing whether extra labels should be added.
#'     These can clarify the plot, but will shift when different plot widths are used.
#'     We recommend saving a plot with a width of 12, when using this argument.
#' @param condensed More condensed plotting format. Default = F.
#'
#' @return A ggplot figure.
#'
#' @examples
#' ## Get The indel counts
#' ## See 'count_indel_contexts()' for more info on how to do this.
#' indel_counts <- readRDS(system.file("states/blood_indel_counts.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' ## Plot contexts
#' plot_indel_contexts(indel_counts)
#'
#' ## Use the same y axis for all samples.
#' plot_indel_contexts(indel_counts, same_y = TRUE)
#'
#' ## Add extra labels to make plot clearer
#' plot_indel_contexts(indel_counts, extra_labels = TRUE)
#'
#' ## Create a more condensed plot
#' plot_indel_contexts(indel_counts, condensed = TRUE)
#' @import ggplot2
#' @importFrom magrittr %>%
#' @family Indels
#'
#' @seealso \code{\link{count_indel_contexts}}, \code{\link{plot_main_indel_contexts}}
#'
#' @export
plot_indel_contexts <- function(counts, same_y = FALSE, extra_labels = FALSE, condensed = FALSE) {
  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  count <- muttype <- muttype_sub <- muttype_total <- sample <- NULL

  # Separate muttype and muttype_sub. Then make data long
  counts <- counts %>%
    as.data.frame() %>%
    tibble::rownames_to_column("muttype_total") %>%
    tidyr::separate(muttype_total, c("muttype", "muttype_sub"), sep = "_(?=[0-9])") %>%
    dplyr::mutate(muttype = factor(muttype, levels = unique(muttype))) %>%
    tidyr::gather(key = "sample", value = "count", -muttype, -muttype_sub) %>% 
    dplyr::mutate(sample = factor(sample, levels = unique(sample)))

  # Count nr mutations. (This is used for the facets)
  nr_muts <- counts %>%
    dplyr::group_by(sample) %>%
    dplyr::summarise(nr_muts = round(sum(count)))

  # Create facet texts
  facet_labs_y <- stringr::str_c(nr_muts$sample, " (n = ", nr_muts$nr_muts, ")")
  names(facet_labs_y) <- nr_muts$sample
  facet_labs_x <- c("1: C", "1: T", "1: C", "1: T", 2, 3, 4, "5+", 2, 3, 4, "5+", 2, 3, 4, "5+")
  names(facet_labs_x) <- levels(counts$muttype)

  # Set plotting parameters
  if (same_y) {
    facet_scale <- "free_x"
  } else {
    facet_scale <- "free"
  }

  colors <- c(
    "#FDBE6F", "#FF8001", "#B0DD8B", "#36A12E", "#FDCAB5", "#FC8A6A",
    "#F14432", "#BC141A", "#D0E1F2", "#94C4DF", "#4A98C9", "#1764AB",
    "#E2E2EF", "#B6B6D8", "#8683BD", "#61409B"
  )

  # Add optional extra labels
  if (extra_labels) {
    title <- stringr::str_c(
      "Deletion           ",
      "Insertion          ",
      "Deletion                                   ",
      "Insertion                                  ",
      "Deletion (MH)"
    )
    x_lab <- stringr::str_c(
      "Homopolymer length                            ",
      "Number of repeat units                                                                               ",
      "Microhomology length"
    )
  } else {
    title <- x_lab <- ""
  }

  # Change plotting parameters based on whether plot should be condensed.
  if (condensed == TRUE) {
    width <- 1
    spacing <- 0
  } else {
    width <- 0.6
    spacing <- 0.5
  }

  # Create figure
  fig <- ggplot(counts, aes(x = muttype_sub, y = count, fill = muttype, width = width)) +
    geom_bar(stat = "identity") +
    facet_grid(sample ~ muttype,
      scales = facet_scale, space = "free_x",
      labeller = labeller(muttype = facet_labs_x, sample = facet_labs_y)
    ) +
    scale_fill_manual(values = colors) +
    theme_bw() +
    labs(fill = "Mutation type", title = title, y = "Nr of indels", x = x_lab) +
    theme(
      panel.grid.major.x = element_blank(),
      panel.grid.minor.y = element_blank(),
      panel.spacing.x = unit(spacing, "lines")
    )

  return(fig)
}