File: plot_profile_region.R

package info (click to toggle)
r-bioc-mutationalpatterns 3.16.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,360 kB
  • sloc: sh: 8; makefile: 2
file content (132 lines) | stat: -rw-r--r-- 4,753 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#' Plot 96 trinucleotide profile per subgroup
#'
#' Plot relative contribution of 96 trinucleotides per subgroup.
#' This can be genomic regions but could also be other subsets.
#' The function uses a matrix generated by 'lengthen_mut_matrix()'
#' as its input.
#'
#' @param mut_matrix Mutation matrix
#' @param mode 'relative_sample', 'relative_sample_feature' or 'absolute'
#' When 'relative_sample', the number of variants will be shown
#' divided by the total number of variants in that sample.
#' When 'relative_sample_feature', the number of variants will be shown
#' divided by the total number of variants in that sample. and genomic region.
#' @param colors 6 value color vector
#' @param ymax Y axis maximum value, default = 0.2
#' @param condensed More condensed plotting format. Default = FALSE.
#' @return 96 trinucleotide profile plot per region
#'
#' @import ggplot2
#' @importFrom magrittr %>%
#'
#' @examples
#' ## See the 'lengthen_mut_matrix()' example for how we obtained the
#' ## mutation matrix information:
#' mut_mat_long <- readRDS(system.file("states/mut_mat_longregions.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' ## Plot the 96-profile of three samples
#' plot_profile_region(mut_mat_long[, c(1, 4, 7)])
#' @seealso
#' \code{\link{mut_matrix}}
#' @family genomic_regions
#'
#' @export
#'
plot_profile_region <- function(mut_matrix, mode = c("relative_sample", "relative_sample_feature", "absolute"), colors = NULL, ymax = 0.2, condensed = FALSE) {

  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  context <- feature <- substitution <- freq <- NULL

  # Match argument
  mode <- match.arg(mode)

  # if colors parameter not provided, set to default colors.
  if (is.null(colors)) {
    colors <- COLORS6
  }
  if (length(colors) != 6) {
    stop("Provide colors vector with length 6", call. = FALSE)
  }

  if (condensed) {
    bar_width <- 1
  } else {
    bar_width <- 0.6
  }

  # Count number muts for labelling.
  nr_muts <- colSums(mut_matrix)
  facet_labs_y <- stringr::str_c(colnames(mut_matrix), " (n = ", nr_muts, ")")
  names(facet_labs_y) <- colnames(mut_matrix)

  # Split the rownames in context, substitution and features
  row_names <- rownames(mut_matrix)
  full_context <- stringr::str_remove(row_names, "_.*")
  context <- stringr::str_replace(full_context, "\\[.*\\]", "\\.")
  substitution <- full_context %>%
    stringr::str_remove(".*\\[") %>%
    stringr::str_remove("\\].*")
  feature <- stringr::str_remove(row_names, ".*_")
  feature <- factor(feature, levels = rev(unique(feature)))

  # Combine everything in a tb
  tb <- mut_matrix %>%
    as.data.frame() %>%
    tibble::as_tibble() %>%
    dplyr::mutate(context = context, feature = feature, substitution = substitution) %>%
    tidyr::gather(value = "freq", key = "sample", -context, -feature, -substitution)

  if (mode == "relative_sample") {
    tb <- tb %>%
      dplyr::group_by(sample) %>%
      dplyr::mutate(freq = freq / sum(freq)) %>%
      dplyr::ungroup()
    y_axis_break_interval <- 0.1
  } else if (mode == "relative_sample_feature") {
    tb <- tb %>%
      dplyr::group_by(sample, feature) %>%
      dplyr::mutate(freq = freq / sum(freq)) %>%
      dplyr::ungroup()
    y_axis_break_interval <- 0.1
  } else if (mode == "absolute") {
    y_axis_break_interval <- 10
  }
  tb <- dplyr::mutate(tb, freq = ifelse(is.nan(freq), 0, freq))

  # Create figure.
  # Suppresses alpha warning.
  withCallingHandlers(
    {
      fig <- ggplot(data = tb, aes(x = context, y = freq, fill = substitution, alpha = feature)) +
        geom_bar(stat = "identity", colour = "black", size = 0.2, width = bar_width) +
        scale_fill_manual(values = colors) +
        facet_grid(sample ~ substitution, labeller = labeller(sample = facet_labs_y)) +
        ylab("Relative contribution") +
        coord_cartesian(ylim = c(0, ymax)) +
        scale_alpha_discrete(range = c(0.4, 1)) +
        scale_y_continuous(breaks = seq(0, ymax, y_axis_break_interval)) +
        guides(fill = "none") +
        theme_bw() +
        theme(
          axis.title.y = element_text(size = 12, vjust = 1),
          axis.text.y = element_text(size = 8),
          axis.title.x = element_text(size = 12),
          axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5),
          strip.text.x = element_text(size = 9),
          strip.text.y = element_text(size = 9),
          panel.grid.major.x = element_blank(),
          panel.spacing.x = unit(0, "lines")
        )
    },
    warning = function(w) {
      if (grepl("Using alpha for a discrete variable is not advised.", conditionMessage(w))) {
        invokeRestart("muffleWarning")
      }
    }
  )

  return(fig)
}