File: plot_indel_contexts.R

package info (click to toggle)
r-bioc-mutationalpatterns 3.0.1%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 5,908 kB
  • sloc: sh: 8; makefile: 2
file content (130 lines) | stat: -rw-r--r-- 4,800 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#' Plot the indel contexts
#'
#' @details
#' Plots the number of indels  COSMIC context per sample.
#' It takes a tibble with counts as its input. This tibble can be generated by 'count_indel_contexts()'.
#' Each sample is plotted in a separate facet.
#' The same y axis can be used for all samples or a separate y axis can be used.
#' The facets at the top show the indel types. First the C and T deletions
#' Then the C and T insertions. Next are the multi base deletions and insertions.
#' Finally the deletions with microhomology (mh) are shown.
#' The x-axis at the bottom shows the number of repeat units.
#' For mh deletions the microhomology length is shown.
#'
#' @param counts A tibble containing the number of indels per COSMIC context.
#' @param same_y A boolean describing whether the same y axis should be used for all samples.
#' @param extra_labels A boolean describing whether extra labels should be added.
#'     These can clarify the plot, but will shift when different plot widths are used.
#'     We recommend saving a plot with a width of 12, when using this argument.
#' @param condensed More condensed plotting format. Default = F.
#'
#' @return A ggplot figure.
#'
#' @examples
#' ## Get The indel counts
#' ## See 'count_indel_contexts()' for more info on how to do this.
#' indel_counts <- readRDS(system.file("states/blood_indel_counts.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' ## Plot contexts
#' plot_indel_contexts(indel_counts)
#'
#' ## Use the same y axis for all samples.
#' plot_indel_contexts(indel_counts, same_y = TRUE)
#'
#' ## Add extra labels to make plot clearer
#' plot_indel_contexts(indel_counts, extra_labels = TRUE)
#'
#' ## Create a more condensed plot
#' plot_indel_contexts(indel_counts, condensed = TRUE)
#' @import ggplot2
#' @importFrom magrittr %>%
#' @family Indels
#'
#' @seealso \code{\link{count_indel_contexts}}, \code{\link{plot_main_indel_contexts}}
#'
#' @export
plot_indel_contexts <- function(counts, same_y = FALSE, extra_labels = FALSE, condensed = FALSE) {
  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  count <- muttype <- muttype_sub <- muttype_total <- sample <- NULL

  # Separate muttype and muttype_sub. Then make data long
  counts <- counts %>%
    as.data.frame() %>%
    tibble::rownames_to_column("muttype_total") %>%
    tidyr::separate(muttype_total, c("muttype", "muttype_sub"), sep = "_(?=[0-9])") %>%
    dplyr::mutate(muttype = factor(muttype, levels = unique(muttype))) %>%
    tidyr::gather(key = "sample", value = "count", -muttype, -muttype_sub) %>% 
    dplyr::mutate(sample = factor(sample, levels = unique(sample)))

  # Count nr mutations. (This is used for the facets)
  nr_muts <- counts %>%
    dplyr::group_by(sample) %>%
    dplyr::summarise(nr_muts = round(sum(count)))

  # Create facet texts
  facet_labs_y <- stringr::str_c(nr_muts$sample, " (n = ", nr_muts$nr_muts, ")")
  names(facet_labs_y) <- nr_muts$sample
  facet_labs_x <- c("1: C", "1: T", "1: C", "1: T", 2, 3, 4, "5+", 2, 3, 4, "5+", 2, 3, 4, "5+")
  names(facet_labs_x) <- levels(counts$muttype)

  # Set plotting parameters
  if (same_y) {
    facet_scale <- "free_x"
  } else {
    facet_scale <- "free"
  }

  colors <- c(
    "#FDBE6F", "#FF8001", "#B0DD8B", "#36A12E", "#FDCAB5", "#FC8A6A",
    "#F14432", "#BC141A", "#D0E1F2", "#94C4DF", "#4A98C9", "#1764AB",
    "#E2E2EF", "#B6B6D8", "#8683BD", "#61409B"
  )

  # Add optional extra labels
  if (extra_labels) {
    title <- stringr::str_c(
      "Deletion           ",
      "Insertion          ",
      "Deletion                                   ",
      "Insertion                                  ",
      "Deletion (MH)"
    )
    x_lab <- stringr::str_c(
      "Homopolymer length                            ",
      "Number of repeat units                                                                               ",
      "Microhomology length"
    )
  } else {
    title <- x_lab <- ""
  }

  # Change plotting parameters based on whether plot should be condensed.
  if (condensed == TRUE) {
    width <- 1
    spacing <- 0
  } else {
    width <- 0.6
    spacing <- 0.5
  }

  # Create figure
  fig <- ggplot(counts, aes(x = muttype_sub, y = count, fill = muttype, width = width)) +
    geom_bar(stat = "identity") +
    facet_grid(sample ~ muttype,
      scales = facet_scale, space = "free_x",
      labeller = labeller(muttype = facet_labs_x, sample = facet_labs_y)
    ) +
    scale_fill_manual(values = colors) +
    theme_bw() +
    labs(fill = "Mutation type", title = title, y = "Nr of indels", x = x_lab) +
    theme(
      panel.grid.major.x = element_blank(),
      panel.grid.minor.y = element_blank(),
      panel.spacing.x = unit(spacing, "lines")
    )

  return(fig)
}