File: mut_192_occurrences.R

package info (click to toggle)
r-bioc-mutationalpatterns 3.16.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,360 kB
  • sloc: sh: 8; makefile: 2
file content (68 lines) | stat: -rw-r--r-- 2,281 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#' Count 192 trinucleotide mutation occurrences
#'
#'  @details
#'  This function is called by mut_matrix_stranded.
#'  The 192 trinucleotide context is the 96 trinucleotide context combined with the strands.
#'  This function calculates the 192 trinucleotide context for all variants.
#'  and then splits these per GRanges (samples). It then calculates how often each 192 trinucleotide context occurs.
#'
#' @param type_context result from type_context function
#' @param strand factor with strand information for each
#' position, for example "U" for untranscribed, "T" for transcribed strand,
#' and "-" for unknown
#' @param gr_sizes A vector indicating the number of variants per GRanges
#'
#' @importFrom magrittr %>%
#'
#' @return Mutation matrix with 192 mutation occurrences and 96 trinucleotides
#' for two strands

mut_192_occurrences <- function(type_context, strand, gr_sizes) {
  # get possible strand values
  values <- levels(strand)

  idx1 <- which(strand == values[1])
  idx2 <- which(strand == values[2])

  # get type context for both vcf subsets
  type_context_1 <- purrr::map(type_context, function(x) x[idx1])
  type_context_2 <- purrr::map(type_context, function(x) x[idx2])

  # Subset the gr_sizes.
  sample_vector <- rep(names(gr_sizes), gr_sizes) %>%
    factor(levels = names(gr_sizes))
  table_vector_1 <- sample_vector[idx1] %>%
    table()
  gr_sizes_1 <- as.vector(table_vector_1)
  names(gr_sizes_1) <- names(table_vector_1)
  table_vector_2 <- sample_vector[idx2] %>%
    table()
  gr_sizes_2 <- as.vector(table_vector_2)
  names(gr_sizes_2) <- names(table_vector_2)

  # make 96-trinucleotide count vector per set
  mut_mat_1 <- mut_96_occurrences(type_context_1, gr_sizes_1)
  mut_mat_2 <- mut_96_occurrences(type_context_2, gr_sizes_2)

  # add names
  names_1 <- paste(rownames(mut_mat_1), values[1], sep = "-")
  names_2 <- paste(rownames(mut_mat_2), values[2], sep = "-")

  # combine matrixes
  mut_mat <- rbind(mut_mat_1, mut_mat_2)
  rownames(mut_mat) <- c(names_1, names_2)

  # Reorder for backwards compatibility
  reorder_i <- purrr::map2(
    seq(1, nrow(mut_mat) / 2),
    seq(
      nrow(mut_mat) / 2 + 1,
      nrow(mut_mat)
    ),
    c
  ) %>%
    unlist()
  mut_mat <- mut_mat[reorder_i, , drop = FALSE]

  return(mut_mat)
}