File: count_indel_contexts.R

package info (click to toggle)
r-bioc-mutationalpatterns 3.16.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,360 kB
  • sloc: sh: 8; makefile: 2
file content (159 lines) | stat: -rw-r--r-- 5,400 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159

#' Count indel contexts
#'
#' @details
#' Counts the number of indels per COSMIC context from a GRanges or GRangesList object containing indel mutations.
#' This function applies the count_indel_contexts_gr function to each gr in its input.
#' It then combines the results in a single tibble and returns this.
#'
#' @param vcf_list GRanges or GRangesList object containing indel mutations in which the context was added with get_indel_context.
#'
#' @return A tibble containing the number of indels per COSMIC context per gr.
#'
#' @examples
#' ## Get a GRangesList or GRanges object with indel contexts.
#' ## See 'indel_get_context' for more info on how to do this.
#' grl_indel_context <- readRDS(system.file("states/blood_grl_indel_context.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' # Count the indel contexts
#' count_indel_contexts(grl_indel_context)
#' @family Indels
#'
#' @seealso \code{\link{get_indel_context}}
#'
#' @export
count_indel_contexts <- function(vcf_list) {

  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  muttype <- muttype_sub <- NULL

  categories <- INDEL_CATEGORIES

  # Turn grl into list if needed.
  if (inherits(vcf_list, "CompressedGRangesList")) {
    vcf_list <- as.list(vcf_list)
  }

  # Count contexts per sample
  if (inherits(vcf_list, "list")) {
    counts_l <- purrr::map(vcf_list, .count_indel_contexts_gr, categories)
    counts <- do.call(cbind, counts_l)
    colnames(counts) <- names(vcf_list)
  } else if (inherits(vcf_list, "GRanges")) {
    counts <- .count_indel_contexts_gr(vcf_list, categories)
    colnames(counts) <- "My_sample"
  } else {
    .not_gr_or_grl(vcf_list)
  }
  counts <- cbind(categories, counts)
  counts[is.na(counts)] <- 0
  counts <- counts %>%
    tidyr::unite("muttype_total", muttype, muttype_sub) %>%
    tibble::column_to_rownames("muttype_total") %>%
    as.matrix()

  # counts = dplyr::as_tibble(counts)
  # counts$muttype = factor(counts$muttype, levels = unique(counts$muttype))
  return(counts)
}

#' Count indel contexts from a single GRanges object.
#'
#' @details
#' Counts the number of indels per COSMIC context from a GRanges object containing indel mutations.
#' The function is called by count_indel_contexts
#'
#' @param gr GRanges object containing indel mutations in which the context was added with get_indel_context.
#' @param categories A tibble containing all possible indel context categories
#'
#' @return A single column tibble containing the number of indels per COSMIC context.
#'
#' @importFrom magrittr %>%
#' @noRd
#'
.count_indel_contexts_gr <- function(gr, categories) {
  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  muttype <- muttype_sub <- NULL

  # Check gr is not empty
  if (length(gr) == 0) {
    categories <- categories %>%
      dplyr::mutate(count = 0) %>%
      dplyr::select(-muttype, -muttype_sub)
    return(categories)
  }

  # Check context has previously been set.
  gr_colnames <- colnames(mcols(gr))
  if (!all(c("muttype", "muttype_sub") %in% gr_colnames)) {
    stop("The GRanges object does not contain the columns `muttype`` and `muttype_sub`.
             Did you forget to run `get_indel_context`?", call. = FALSE)
  }


  # Classify the number of repeat units/ homopolymer length / microhomology length
  # to either 5+ or 6+ depending on whether the indel is a ins or del.
  id_context <- dplyr::tibble("muttype" = gr$muttype, "muttype_sub" = gr$muttype_sub) %>%
    dplyr::mutate(
      muttype_sub = ifelse(muttype_sub >= 6, "6+", muttype_sub),
      muttype_sub = ifelse(grepl("insertion|microhomology", muttype) & muttype_sub >= 5,
        "5+", muttype_sub
      ),
      muttype_sub = as.character(muttype_sub)
    ) # Ensures column type for later joining

  # Classify large indels as size 5+
  id_context$muttype = .set_large_indels_as_5plus(id_context$muttype, gr)
  
  id_context_count <- id_context %>%
    dplyr::group_by(muttype, muttype_sub) %>%
    dplyr::summarise(count = dplyr::n())
  id_context_count_full <- dplyr::left_join(categories,
    id_context_count,
    by = c("muttype", "muttype_sub")
  ) %>%
    dplyr::select(-muttype, -muttype_sub)
  # colnames(id_context_count_full)[3] = name
  return(id_context_count_full)
}


#' Classify large indels as size 5+
#' Indels with a size that is more or equal to 5,
#' are set to size 5+
#'
#' @param muttype A vector containing the contexts of the mutations
#' @param gr GRanges object containing indel mutations in which the context was added with get_indel_context.
#'
#' @return A modified version of the vector containing the contexts of the mutations
#' 
#' @importFrom magrittr %>%
#' @noRd
.set_large_indels_as_5plus = function(muttype, gr){
  
  # Determine mutation sizes
  ref_sizes <- gr %>%
    .get_ref() %>%
    width()
  alt_sizes <- gr %>%
    .get_alt() %>%
    unlist() %>%
    width()
  mut_size <- abs(alt_sizes - ref_sizes)
  
  # Change classification of large mutations into 5+
  mut_size_f <- mut_size >= 5
  muttype <- ifelse(mut_size_f,
                               gsub("[0-9]+bp",
                                    "5+bp",
                                    muttype,
                                    perl = TRUE
                               ),
                               muttype
  )
  return(muttype)
}