File: plot_signature_strand_bias.R

package info (click to toggle)
r-bioc-mutationalpatterns 3.16.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,360 kB
  • sloc: sh: 8; makefile: 2
file content (112 lines) | stat: -rw-r--r-- 3,666 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#' Plot signature strand bias
#'
#' Plot strand bias per mutation type for each signature.
#'
#' @param signatures_strand_bias Signature matrix with 192 features
#' @return Barplot
#'
#' @import ggplot2
#' @importFrom magrittr %>%
#'
#' @examples
#' ## See the 'mut_matrix()' example for how we obtained the following
#' ## mutation matrix.
#' mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' ## Extracting signatures can be computationally intensive, so
#' ## we use pre-computed data generated with the following command:
#' # nmf_res_strand <- extract_signatures(mut_mat_s, rank = 2)
#'
#' nmf_res_strand <- readRDS(system.file("states/nmf_res_strand_data.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' ## Provide column names for the plot.
#' colnames(nmf_res_strand$signatures) <- c("Signature A", "Signature B")
#'
#' ## Creat figure
#' plot_signature_strand_bias(nmf_res_strand$signatures)
#'
#' ## You can also plot the bias of samples
#' plot_signature_strand_bias(mut_mat_s[, c(1, 2)])
#' @seealso
#' \code{link{extract_signatures}},
#' \code{link{mut_matrix}}
#'
#' @export

plot_signature_strand_bias <- function(signatures_strand_bias) {
  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  Signature <- type <- ratio <- transcribed <- untranscribed <- NULL
  `Group.1` <- `Group.2` <- significant <- pval <- log2_ratio <- NULL

  # check if there are 192 features in the signatures
  if (dim(signatures_strand_bias)[1] != 192) {
    stop(paste(
      "Input signature matrix does not have 192 features (96",
      "trinucleotide * 2 strands)."
    ))
  }

  # aggregate by strand and type
  sum_per_type <- stats::aggregate(signatures_strand_bias,
    by = list(STRAND, SUBSTITUTIONS_192),
    FUN = sum
  )

  # Make tibble longer
  stats_per_type <- sum_per_type %>%
    dplyr::rename(strand = `Group.1`, type = `Group.2`) %>%
    dplyr::mutate(strand = ifelse(strand == "T", "transcribed", "untranscribed")) %>%
    tidyr::pivot_longer(c(-strand, -type), names_to = "Signature") %>% # Combine signature columns
    tidyr::pivot_wider(names_from = strand) %>% # Split transcribed/untranscribed column for binom test.
    dplyr::mutate(
      observed = as.integer(transcribed),
      size = as.integer(transcribed + untranscribed),
      ratio = (transcribed / untranscribed),
      ratio = ifelse(is.nan(ratio), 1, ratio),
      log2_ratio = log2(ratio)
    )

  # Perform binomial test
  stats_per_type$pval <- purrr::map_dbl(seq_len(nrow(stats_per_type)), function(i) {
    row <- stats_per_type[i, ]
    binomial_test(0.5, row$size, row$observed)$pval
  })
  # Add stars when significant.
  stats_per_type <- dplyr::mutate(stats_per_type, significant = ifelse(pval < 0.05, "*", " "))

  # Find maximum y value for plotting
  ratios <- stats_per_type %>%
    dplyr::filter(is.finite(log2_ratio)) %>%
    dplyr::pull(log2_ratio)
  max <- round(max(abs(ratios)))

  # Plot
  plot <- ggplot(
    stats_per_type,
    aes(x = type, y = log2_ratio, fill = type)
  ) +
    geom_bar(stat = "identity", position = "dodge", color = "black") +
    scale_y_continuous(limits = c(-max, max)) +
    scale_fill_manual(values = COLORS6) +
    facet_grid(Signature ~ .) +
    ylab("log2(transcribed/untranscribed)") +
    theme_bw() +
    scale_x_discrete(breaks = NULL) +
    xlab("") +
    geom_text(
      aes(
        x = type,
        y = log2(ratio),
        label = significant,
        vjust = ifelse(sign(log2_ratio) > 0, 0.5, 1)
      ),
      size = 8, position = ggplot2::position_dodge(width = 1)
    )

  return(plot)
}