File: count_dbs_contexts.R

package info (click to toggle)
r-bioc-mutationalpatterns 3.16.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,360 kB
  • sloc: sh: 8; makefile: 2
file content (99 lines) | stat: -rw-r--r-- 3,289 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#' Count DBS contexts
#'
#' @details
#' Counts the number of DBS per COSMIC context from a GRanges or GRangesList object containing DBS variants.
#' This function applies the count_dbs_contexts_gr function to each gr in its input.
#' It then combines the results in a single tibble and returns this.
#'
#' @param vcf_list GRanges or GRangesList object containing DBS mutations in which the context was added with get_dbs_context.
#'
#' @return A tibble containing the number of DBS per COSMIC context per gr.
#'
#' @examples
#' ## Get a GRangesList or GRanges object with DBS contexts.
#' ## See 'dbs_get_context' for more info on how to do this.
#' grl_dbs_context <- readRDS(system.file("states/blood_grl_dbs_context.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' # Count the DBS contexts
#' count_dbs_contexts(grl_dbs_context)
#' @family DBS
#' @seealso \code{\link{get_dbs_context}}
#'
#' @export
count_dbs_contexts <- function(vcf_list) {

  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  REF <- ALT <- NULL

  # Set possible ref and alt combis.
  categories <- DBS_CATEGORIES

  # Turn grl into list if needed.
  if (inherits(vcf_list, "CompressedGRangesList")) {
    vcf_list <- as.list(vcf_list)
  }

  # Count contexts per sample
  if (inherits(vcf_list, "list")) {
    counts_l <- purrr::map(vcf_list, .count_dbs_contexts_gr, categories)
    counts <- do.call(cbind, counts_l)
    colnames(counts) <- names(vcf_list)
  } else if (inherits(vcf_list, "GRanges")) {
    counts <- .count_dbs_contexts_gr(vcf_list, categories)
    colnames(counts) <- "My_sample"
  } else {
    .not_gr_or_grl(vcf_list)
  }
  counts <- cbind(categories, counts)
  counts[is.na(counts)] <- 0
  counts <- counts %>%
    tidyr::unite("muttype_total", REF, ALT) %>%
    tibble::column_to_rownames("muttype_total") %>%
    as.matrix()

  return(counts)
}



#' Count DBS contexts from a single GRanges object.
#'
#' @details
#' Counts the number of DBS per COSMIC context from a GRanges object containing DBS mutations.
#' The function is called by count_dbs_contexts
#'
#' @param gr GRanges object containing DBS mutations in which the context was added with 'get_dbs_context()'.
#' @param categories A tibble containing all possible DBS context categories
#'
#' @return A single column tibble containing the number of DBS per COSMIC context.
#'
#' @importFrom magrittr %>%
#'
#' @noRd
#'
.count_dbs_contexts_gr <- function(gr, categories) {

  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  REF <- ALT <- NULL

  context <- cbind("REF" = as.vector(.get_ref(gr)), "ALT" = as.vector(unlist(.get_alt(gr))))
  counts <- context %>%
    tibble::as_tibble() %>%
    dplyr::group_by(REF, ALT) %>%
    dplyr::summarise(count = dplyr::n())
  
  if (sum(!counts$REF %in% categories$REF) > 0 | sum(!counts$ALT %in% categories$ALT)){
    stop(paste0("There are some REF or ALT bases, that do not belong to ", 
                "any of the categories. \n",
                "Did you forget to use 'get_dbs_context()'?"), call. = FALSE)
  }
  
  
  counts_full <- dplyr::left_join(categories, counts, by = c("REF", "ALT")) %>%
    dplyr::select(-REF, -ALT)
  return(counts_full)
}