1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
|
#' Plot a mutation matrix as a heatmap
#'
#' Function to plot a SNV mutation matrix as a heatmap.
#' This is especially useful when looking at a wide mutational context.
#'
#'
#' @param mut_matrix Matrix containing mutation counts.
#' @param by Optional grouping variable
#' @param max Maximum value used for plotting the relative contributions.
#' Contributions that are higher will have the maximum colour. (Default: 0.02)
#' @param condensed More condensed plotting format. Default = F.
#'
#' @return A ggplot object
#' @export
#' @importFrom magrittr %>%
#' @import ggplot2
#'
#' @seealso
#' \code{\link{mut_matrix}},
#' \code{\link{plot_96_profile}},
#' \code{\link{plot_river}}
#' @examples
#'
#' ## See the 'mut_matrix()' examples for how we obtained the
#' ## mutation matrix information:
#' ## Get regular matrix
#' mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
#' package = "MutationalPatterns"
#' ))
#'
#' ## Create heatmap of profile
#' plot_profile_heatmap(mut_mat, max = 0.1)
#'
#' ## Get extended matrix
#' mut_mat_extended <- readRDS(system.file("states/mut_mat_data_extended.rds",
#' package = "MutationalPatterns"
#' ))
#'
#' ## Create heatmap of extended profile
#' plot_profile_heatmap(mut_mat_extended)
#'
#' ## Or plot heatmap per tissue
#' tissue <- c(
#' "colon", "colon", "colon",
#' "intestine", "intestine", "intestine",
#' "liver", "liver", "liver"
#' )
#'
#' plot_profile_heatmap(mut_mat_extended, by = tissue)
#'
#' ## Or plot the heatmap per sample.
#' plot_profile_heatmap(mut_mat_extended,
#' by = colnames(mut_mat_extended),
#' max = 0.05
#' )
#'
#'
#' ## Create a condensed heatmap of extended profile
#' plot_profile_heatmap(mut_mat_extended, condensed = TRUE)
plot_profile_heatmap <- function(mut_matrix,
by = NA,
max = 0.02,
condensed = FALSE) {
# These variables use non standard evaluation.
# To avoid R CMD check complaints we initialize them to NULL.
fullcontext <- l_context <- r_context <- muttype <- NULL
nrmuts <- rel_nrmuts <- NULL
# check arguments
if (!inherits(mut_matrix, "matrix")) {
stop("mut_matrix must be a matrix", call. = FALSE)
}
# matrix should have row and colnames
if (length(colnames(mut_matrix)) == 0) {
stop("mut_matrix is missing colnames", call. = FALSE)
}
if (length(rownames(mut_matrix)) == 0) {
stop("mut_matrix is missing rownames", call. = FALSE)
}
# Transform the data into long format and get left/right context seperately.
tb <- mut_matrix %>%
as.data.frame() %>%
tibble::rownames_to_column("fullcontext") %>%
tidyr::pivot_longer(-fullcontext, names_to = "sample", values_to = "nrmuts") %>%
tidyr::separate("fullcontext", # Separate context into left context, mut and right context
into = c("l_context", "muttype", "r_context"),
sep = "\\[|\\]"
) %>%
dplyr::mutate(
mut = factor(muttype, levels = unique(muttype)),
r_context = factor(r_context, levels = unique(r_context)),
l_context = factor(l_context, levels = Biostrings::reverse(unique(l_context)))
)
# Make data relative
tb <- tb %>%
dplyr::group_by(sample) %>%
dplyr::mutate(rel_nrmuts = nrmuts / sum(nrmuts)) %>%
dplyr::ungroup()
# Add sample grouping
if (.is_na(by)) {
by <- "all"
}
tb_by <- tibble::tibble(
"sample" = unique(tb$sample),
"by" = by
)
tb <- tb %>%
dplyr::left_join(tb_by, by = "sample")
# Combine samples based on grouping
tb <- tb %>%
dplyr::mutate(by = factor(by, levels = unique(by))) %>%
dplyr::group_by(by, l_context, muttype, r_context) %>%
dplyr::summarise(
nrmuts = sum(nrmuts),
rel_nrmuts = mean(rel_nrmuts),
total_indv = dplyr::n(),
.groups = "drop_last"
) %>%
dplyr::ungroup()
# If value is higher than y_max, change it to ymax. (Prevents plotting issues)
tb <- tb %>%
dplyr::mutate(rel_nrmuts = ifelse(rel_nrmuts > max, max, rel_nrmuts))
# Count number muts per sample_group
tot_muts_samplegroups_tb <- tb %>%
dplyr::group_by(by) %>%
dplyr::summarise(nrmuts = sum(nrmuts), .groups = "drop_last")
# Create y facet labels
facet_labs_y <- stringr::str_c(tot_muts_samplegroups_tb$by, " (n = ", tot_muts_samplegroups_tb$nrmuts, ")")
names(facet_labs_y) <- tot_muts_samplegroups_tb$by
# Count number muts per mut
tot_muts_muttype_tb <- tb %>%
dplyr::group_by(muttype) %>%
dplyr::summarise(nrmuts = sum(nrmuts), .groups = "drop_last")
# Create x facet labels
facet_labs_x <- stringr::str_c(tot_muts_muttype_tb$muttype, " (n = ", tot_muts_muttype_tb$nrmuts, ")")
names(facet_labs_x) <- tot_muts_muttype_tb$muttype
# Set plotting parameters
context_size <- stringr::str_length(tb$l_context[1])
if (context_size == 1) {
axis_size <- 10
} else if (context_size == 2) {
axis_size <- 8
} else if (context_size == 3) {
axis_size <- 4
} else {
axis_size <- 3
}
# Change plotting parameters based on whether plot should be condensed.
if (condensed == TRUE) {
spacing <- 0
} else {
spacing <- 0.5
}
# Create plot
fig <- ggplot(tb, aes(x = r_context,
y = l_context,
fill = rel_nrmuts)) +
geom_raster() +
scale_fill_distiller(
palette = "YlGnBu",
direction = 1,
name = "Relative contribution",
limits = c(0, max)
) +
facet_grid(by ~ muttype, labeller = labeller(by = facet_labs_y, muttype = facet_labs_x)) +
labs(x = "3' context", y = "5' context") +
theme_bw() +
theme(
axis.text.y = element_text(size = axis_size),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = axis_size),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.spacing.x = unit(spacing, "lines"),
panel.spacing.y = unit(spacing, "lines")
)
return(fig)
}
|