File: plot_profile_region.R

package info (click to toggle)
r-bioc-mutationalpatterns 3.0.1%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 5,908 kB
  • sloc: sh: 8; makefile: 2
file content (130 lines) | stat: -rw-r--r-- 4,672 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#' Plot 96 trinucleotide profile per subgroup
#'
#' Plot relative contribution of 96 trinucleotides per subgroup.
#' This can be genomic regions but could also be other subsets.
#' The function uses a matrix generated by 'lengthen_mut_matrix()'
#' as its input.
#'
#' @param mut_matrix Mutation matrix
#' @param mode 'relative_sample', 'relative_sample_feature' or 'absolute'
#' When 'relative_sample', the number of variants will be shown
#' divided by the total number of variants in that sample.
#' When 'relative_sample_feature', the number of variants will be shown
#' divided by the total number of variants in that sample. and genomic region.
#' @param colors 6 value color vector
#' @param ymax Y axis maximum value, default = 0.2
#' @param condensed More condensed plotting format. Default = FALSE.
#' @return 96 trinucleotide profile plot per region
#'
#' @import ggplot2
#' @importFrom magrittr %>%
#'
#' @examples
#' ## See the 'lengthen_mut_matrix()' example for how we obtained the
#' ## mutation matrix information:
#' mut_mat_long <- readRDS(system.file("states/mut_mat_longregions.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' ## Plot the 96-profile of three samples
#' plot_profile_region(mut_mat_long[, c(1, 4, 7)])
#' @seealso
#' \code{\link{mut_matrix}}
#' @family genomic_regions
#'
#' @export
#'
plot_profile_region <- function(mut_matrix, mode = c("relative_sample", "relative_sample_feature", "absolute"), colors = NULL, ymax = 0.2, condensed = FALSE) {

  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  context <- feature <- substitution <- freq <- NULL

  mode <- match.arg(mode)

  if (is.null(colors)) {
    colors <- COLORS6
  }
  if (length(colors) != 6) {
    stop("Provide colors vector with length 6", call. = FALSE)
  }

  if (condensed) {
    bar_width <- 1
  } else {
    bar_width <- 0.6
  }

  # Count number muts for labelling.
  nr_muts <- colSums(mut_matrix)
  facet_labs_y <- stringr::str_c(colnames(mut_matrix), " (n = ", nr_muts, ")")
  names(facet_labs_y) <- colnames(mut_matrix)

  # Split the rownames in context, substitution and features
  row_names <- rownames(mut_matrix)
  full_context <- stringr::str_remove(row_names, "_.*")
  context <- stringr::str_replace(full_context, "\\[.*\\]", "\\.")
  substitution <- full_context %>%
    stringr::str_remove(".*\\[") %>%
    stringr::str_remove("\\].*")
  feature <- stringr::str_remove(row_names, ".*_")
  feature <- factor(feature, levels = rev(unique(feature)))

  # Combine everything in a tb
  tb <- mut_matrix %>%
    as.data.frame() %>%
    tibble::as_tibble() %>%
    dplyr::mutate(context = context, feature = feature, substitution = substitution) %>%
    tidyr::gather(value = "freq", key = "sample", -context, -feature, -substitution)

  if (mode == "relative_sample") {
    tb <- tb %>%
      dplyr::group_by(sample) %>%
      dplyr::mutate(freq = freq / sum(freq)) %>%
      dplyr::ungroup()
    y_axis_break_interval <- 0.1
  } else if (mode == "relative_sample_feature") {
    tb <- tb %>%
      dplyr::group_by(sample, feature) %>%
      dplyr::mutate(freq = freq / sum(freq)) %>%
      dplyr::ungroup()
    y_axis_break_interval <- 0.1
  } else if (mode == "absolute") {
    y_axis_break_interval <- 10
  }
  tb <- dplyr::mutate(tb, freq = ifelse(is.nan(freq), 0, freq))

  # Create figure.
  # Suppresses alpha warning.
  withCallingHandlers(
    {
      fig <- ggplot(data = tb, aes(x = context, y = freq, fill = substitution, alpha = feature)) +
        geom_bar(stat = "identity", colour = "black", size = 0.2, width = bar_width) +
        scale_fill_manual(values = colors) +
        facet_grid(sample ~ substitution, labeller = labeller(sample = facet_labs_y)) +
        ylab("Relative contribution") +
        coord_cartesian(ylim = c(0, ymax)) +
        scale_alpha_discrete(range = c(0.4, 1)) +
        scale_y_continuous(breaks = seq(0, ymax, y_axis_break_interval)) +
        guides(fill = FALSE) +
        theme_bw() +
        theme(
          axis.title.y = element_text(size = 12, vjust = 1),
          axis.text.y = element_text(size = 8),
          axis.title.x = element_text(size = 12),
          axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5),
          strip.text.x = element_text(size = 9),
          strip.text.y = element_text(size = 9),
          panel.grid.major.x = element_blank(),
          panel.spacing.x = unit(0, "lines")
        )
    },
    warning = function(w) {
      if (grepl("Using alpha for a discrete variable is not advised.", conditionMessage(w))) {
        invokeRestart("muffleWarning")
      }
    }
  )

  return(fig)
}