File: strand_occurrences.R

package info (click to toggle)
r-bioc-mutationalpatterns 3.16.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,360 kB
  • sloc: sh: 8; makefile: 2
file content (79 lines) | stat: -rw-r--r-- 2,592 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#' Count occurrences per base substitution type and strand
#'
#' For each base substitution type and strand the total number
#' of mutations and the relative contribution within a group is returned.
#'
#' @param mut_mat_s 192 feature mutation count matrix, result from
#' 'mut_matrix_stranded()'
#' @param by Character vector with grouping info, optional
#'
#' @return A data.frame with the total number of mutations and relative
#' contribution within group per base substitution type and strand
#'
#' @importFrom magrittr %>%
#'
#' @examples
#' ## See the 'mut_matrix_stranded()' example for how we obtained the
#' ## following mutation matrix.
#' mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' ## Load a reference genome.
#' ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
#' library(ref_genome, character.only = TRUE)
#'
#' tissue <- c(
#'   "colon", "colon", "colon",
#'   "intestine", "intestine", "intestine",
#'   "liver", "liver", "liver"
#' )
#'
#' strand_counts <- strand_occurrences(mut_mat_s, by = tissue)
#' @seealso
#' \code{\link{mut_matrix_stranded}},
#' \code{\link{plot_strand}},
#' \code{\link{plot_strand_bias}}
#'
#' @export

strand_occurrences <- function(mut_mat_s, by = NA) {

  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  type_strand <- no_mutations <- group <- NULL

  # Make data long
  tb_per_sample <- mut_mat_s %>%
    as.data.frame() %>%
    tibble::rownames_to_column("type_strand") %>%
    tidyr::pivot_longer(-type_strand, values_to = "no_mutations", names_to = "sample") %>%
    tidyr::separate(type_strand, into = c("type", "strand"), sep = "-") %>%
    dplyr::mutate(type = stringr::str_replace(type, ".*\\[(.*)\\].*", "\\1"))

  # If grouping variable not provided, set to "all"
  if (.is_na(by)) {
    by <- "all"
  }
  
  # Add grouping info
  tb_by <- tibble::tibble(
    "sample" = unique(tb_per_sample$sample),
    "by" = by
  )
  tb_per_sample <- tb_per_sample %>%
    dplyr::left_join(tb_by, by = "sample")

  # Summarise per group
  tb <- tb_per_sample %>%
    dplyr::mutate(by = factor(by, levels = unique(by))) %>% 
    dplyr::group_by(by, type, strand) %>% # Summarise the number of mutations per group
    dplyr::summarise(no_mutations = sum(no_mutations)) %>%
    dplyr::ungroup() %>%
    dplyr::group_by(by) %>% # Calculate relative per group. NOT per sample.
    dplyr::mutate(relative_contribution = no_mutations / sum(no_mutations)) %>%
    dplyr::ungroup() %>%
    dplyr::rename(group = by)

  return(tb)
}