File: plot_enrichment_depletion.R

package info (click to toggle)
r-bioc-mutationalpatterns 3.16.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,360 kB
  • sloc: sh: 8; makefile: 2
file content (155 lines) | stat: -rw-r--r-- 4,658 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#' Plot enrichment/depletion of mutations in genomic regions
#'
#' @param df Dataframe result from enrichment_depletion_test()
#' @param sig_type The type of significance to be used. Possible values:
#'              * 'fdr' False discovery rate.
#'              A type of multiple testing correction.;
#'              * 'p' for regular p values.
#' @return Plot with two parts. 1: Barplot with no. mutations expected and
#' observed per region. 2: Effect size of enrichment/depletion
#' (log2ratio) with results significance test.
#'
#' @import ggplot2
#'
#' @examples
#' ## See the 'genomic_distribution()' example for how we obtained the
#' ## following data:
#' distr <- readRDS(system.file("states/distr_data.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' tissue <- c(
#'   "colon", "colon", "colon",
#'   "intestine", "intestine", "intestine",
#'   "liver", "liver", "liver"
#' )
#'
#' ## Perform the enrichment/depletion test.
#' distr_test <- enrichment_depletion_test(distr, by = tissue)
#'
#' ## Plot the enrichment/depletion
#' plot_enrichment_depletion(distr_test)
#' 
#' #Perform and plot the enrichmet depletion test for all samples pooled
#' distr_test2 <- enrichment_depletion_test(distr)
#' plot_enrichment_depletion(distr_test2)
#'
#' ## Plot with p values instead of fdr
#' plot_enrichment_depletion(distr_test, sig_type = "p")
#'
#' ## Use multiple (max 3) significance cutoffs.
#' ## This will vary the number of significance stars.
#' distr_multistars <- enrichment_depletion_test(distr,
#'   by = tissue,
#'   p_cutoffs = c(0.05, 0.01, 0.005),
#'   fdr_cutoffs = c(0.1, 0.05, 0.01)
#' )
#' plot_enrichment_depletion(distr_multistars)
#' @seealso
#' \code{\link{enrichment_depletion_test}},
#' \code{\link{genomic_distribution}}
#'
#' @export

plot_enrichment_depletion <- function(df, sig_type = c("fdr", "p")) {

  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  value <- variable <- observed <- expected <- significant <- NULL
  region <- ratio <- log2_ratio <- sig_plot <- NULL

  # Match argument
  sig_type <- match.arg(sig_type)

  # Create dataframe for absolute part of plot
  df_abs <- df %>%
    dplyr::select(by, region, observed, expected) %>%
    tidyr::pivot_longer(c(-by, -region), names_to = "variable", values_to = "value") %>%
    dplyr::mutate(variable = factor(variable, levels = unique(variable)))

  # Create dataframe for ratio part of plot
  df_ratio <- df %>%
    dplyr::mutate(
      ratio = (observed + 0.1) / (expected + 0.1),
      log2_ratio = log2(ratio)
    )

  if (sig_type == "p") {
    df_ratio$sig_plot <- df_ratio$significant
  } else {
    df_ratio$sig_plot <- df_ratio$significant_fdr
  }



  # Part 1: No. mutations expected and observed per region
  withCallingHandlers(
    {
      plot1 <- ggplot(df_abs, aes(
        x = by,
        y = value,
        fill = by,
        group = variable,
        alpha = variable
      )) +
        geom_bar(
          colour = "black",
          stat = "identity",
          position = position_dodge()
        ) +
        facet_grid(~region) +
        labs(x = "", y = "No. mutations") +
        scale_x_discrete(breaks = NULL) +
        scale_alpha_discrete(range = c(0.1, 1)) +
        theme_bw() +
        theme(
          axis.ticks = element_blank(),
          axis.text.x = element_blank(),
          legend.title = element_blank()
        )
    },
    warning = function(w) {
      if (grepl("Using alpha for a discrete variable is not advised.", conditionMessage(w))) {
        invokeRestart("muffleWarning")
      }
    }
  )

  # determine max y value for plotting
  # = log2 ratio with pseudo counts
  y_max <- round(max(abs(df_ratio$log2_ratio)), digits = 1) + 0.1

  # Part 2: effect size of enrichment/depletion with significance test
  plot2 <- ggplot(data = df_ratio, aes(
    x = by,
    y = log2_ratio,
    fill = by
  )) +
    geom_bar(
      colour = "black",
      stat = "identity",
      position = position_dodge()
    ) +
    scale_y_continuous(limits = c(-y_max, y_max)) +
    geom_text(
      aes(
        x = by,
        y = log2_ratio,
        label = sig_plot,
        vjust = ifelse(sign(log2_ratio) > 0, 0.5, 1)
      ),
      size = 8, position = position_dodge(width = 1)
    ) +
    facet_grid(~region) +
    labs(x = "", y = "log2(observed/expected)") +
    scale_x_discrete(breaks = NULL) +
    theme_bw() +
    theme(
      axis.ticks = element_blank(),
      axis.text.x = element_blank(),
      legend.title = element_blank()
    )

  output <- cowplot::plot_grid(plot1, plot2, ncol = 1, nrow = 2, rel_heights = c(2, 1.2))
  return(output)
}