1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
|
#' Plot 96 trinucleotide profile per subgroup
#'
#' Plot relative contribution of 96 trinucleotides per subgroup.
#' This can be genomic regions but could also be other subsets.
#' The function uses a matrix generated by 'lengthen_mut_matrix()'
#' as its input.
#'
#' @param mut_matrix Mutation matrix
#' @param mode 'relative_sample', 'relative_sample_feature' or 'absolute'
#' When 'relative_sample', the number of variants will be shown
#' divided by the total number of variants in that sample.
#' When 'relative_sample_feature', the number of variants will be shown
#' divided by the total number of variants in that sample. and genomic region.
#' @param colors 6 value color vector
#' @param ymax Y axis maximum value, default = 0.2
#' @param condensed More condensed plotting format. Default = FALSE.
#' @return 96 trinucleotide profile plot per region
#'
#' @import ggplot2
#' @importFrom magrittr %>%
#'
#' @examples
#' ## See the 'lengthen_mut_matrix()' example for how we obtained the
#' ## mutation matrix information:
#' mut_mat_long <- readRDS(system.file("states/mut_mat_longregions.rds",
#' package = "MutationalPatterns"
#' ))
#'
#' ## Plot the 96-profile of three samples
#' plot_profile_region(mut_mat_long[, c(1, 4, 7)])
#' @seealso
#' \code{\link{mut_matrix}}
#' @family genomic_regions
#'
#' @export
#'
plot_profile_region <- function(mut_matrix, mode = c("relative_sample", "relative_sample_feature", "absolute"), colors = NULL, ymax = 0.2, condensed = FALSE) {
# These variables use non standard evaluation.
# To avoid R CMD check complaints we initialize them to NULL.
context <- feature <- substitution <- freq <- NULL
# Match argument
mode <- match.arg(mode)
# if colors parameter not provided, set to default colors.
if (is.null(colors)) {
colors <- COLORS6
}
if (length(colors) != 6) {
stop("Provide colors vector with length 6", call. = FALSE)
}
if (condensed) {
bar_width <- 1
} else {
bar_width <- 0.6
}
# Count number muts for labelling.
nr_muts <- colSums(mut_matrix)
facet_labs_y <- stringr::str_c(colnames(mut_matrix), " (n = ", nr_muts, ")")
names(facet_labs_y) <- colnames(mut_matrix)
# Split the rownames in context, substitution and features
row_names <- rownames(mut_matrix)
full_context <- stringr::str_remove(row_names, "_.*")
context <- stringr::str_replace(full_context, "\\[.*\\]", "\\.")
substitution <- full_context %>%
stringr::str_remove(".*\\[") %>%
stringr::str_remove("\\].*")
feature <- stringr::str_remove(row_names, ".*_")
feature <- factor(feature, levels = rev(unique(feature)))
# Combine everything in a tb
tb <- mut_matrix %>%
as.data.frame() %>%
tibble::as_tibble() %>%
dplyr::mutate(context = context, feature = feature, substitution = substitution) %>%
tidyr::gather(value = "freq", key = "sample", -context, -feature, -substitution)
if (mode == "relative_sample") {
tb <- tb %>%
dplyr::group_by(sample) %>%
dplyr::mutate(freq = freq / sum(freq)) %>%
dplyr::ungroup()
y_axis_break_interval <- 0.1
} else if (mode == "relative_sample_feature") {
tb <- tb %>%
dplyr::group_by(sample, feature) %>%
dplyr::mutate(freq = freq / sum(freq)) %>%
dplyr::ungroup()
y_axis_break_interval <- 0.1
} else if (mode == "absolute") {
y_axis_break_interval <- 10
}
tb <- dplyr::mutate(tb, freq = ifelse(is.nan(freq), 0, freq))
# Create figure.
# Suppresses alpha warning.
withCallingHandlers(
{
fig <- ggplot(data = tb, aes(x = context, y = freq, fill = substitution, alpha = feature)) +
geom_bar(stat = "identity", colour = "black", size = 0.2, width = bar_width) +
scale_fill_manual(values = colors) +
facet_grid(sample ~ substitution, labeller = labeller(sample = facet_labs_y)) +
ylab("Relative contribution") +
coord_cartesian(ylim = c(0, ymax)) +
scale_alpha_discrete(range = c(0.4, 1)) +
scale_y_continuous(breaks = seq(0, ymax, y_axis_break_interval)) +
guides(fill = "none") +
theme_bw() +
theme(
axis.title.y = element_text(size = 12, vjust = 1),
axis.text.y = element_text(size = 8),
axis.title.x = element_text(size = 12),
axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5),
strip.text.x = element_text(size = 9),
strip.text.y = element_text(size = 9),
panel.grid.major.x = element_blank(),
panel.spacing.x = unit(0, "lines")
)
},
warning = function(w) {
if (grepl("Using alpha for a discrete variable is not advised.", conditionMessage(w))) {
invokeRestart("muffleWarning")
}
}
)
return(fig)
}
|