File: plot_lesion_segregation.R

package info (click to toggle)
r-bioc-mutationalpatterns 3.0.1%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 5,908 kB
  • sloc: sh: 8; makefile: 2
file content (172 lines) | stat: -rw-r--r-- 5,557 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#' Plot the strands of variants to show lesion segregation
#'
#' The strands of variants in a GRanges object is plotted.
#' This way the presence of any lesion segregation is visualized.
#'
#' @param vcf GRanges object
#' @param per_chrom Boolean. Determines whether to create a separate plot per chromosome
#' @param sample_name Name of the sample
#'
#' @return ggplot2 object
#' @export
#' @seealso
#' \code{\link{calculate_lesion_segregation}}
#' @family Lesion_segregation
#' @examples
#'
#' ## See the 'read_vcfs_as_granges()' example for how we obtained the
#' ## following data:
#' grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
#'   package = "MutationalPatterns"
#' ))
#' ## Select a single GRanges object to plot.
#' gr <- grl[[1]]
#'
#' ## Plot lesion segregation
#' plot_lesion_segregation(gr, sample_name = "Colon1")
#'
#' ## Plot lesion segregation per chromosome
#' plot_lesion_segregation(gr, per_chrom = TRUE, sample_name = "Colon1")
plot_lesion_segregation <- function(vcf, per_chrom = FALSE, sample_name = NA) {

  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  max_pos <- start_mb <- notused <- NULL

  # Get strandedness
  vcf <- .get_strandedness_gr(vcf)
  
  #Genome is set to NULL to ensure seqlevels can be changed.
  GenomeInfoDb::genome(vcf) <- NA
  GenomeInfoDb::seqlevelsStyle(vcf) <- "NCBI" # This takes less space when plotting
  
  tb <- .get_strandedness_tb(vcf)

  # Ensures that the entire chromosomes are plotted, even when mutations don't span the entire chromosome.
  tb_limits <- GenomeInfoDb::seqlengths(vcf) %>%
    tibble::enframe(name = "seqnames", value = "max_pos") %>%
    dplyr::mutate(
      max_pos_mb = max_pos / 1000000,
      min_pos_mb = 1,
      seqnames = factor(seqnames, levels = levels(tb$seqnames))
    ) %>%
    dplyr::select(-max_pos) %>%
    tidyr::gather(key = "notused", value = "start_mb", -seqnames) %>%
    dplyr::mutate(
      start_mb = tidyr::replace_na(start_mb, 0), # NAs generated by unknown seqlevel lengths are replaced.
      y = ifelse(notused == "max_pos_mb", 1, 0)
    ) # Ensure there is always a 1 and a -1 in each chromosome.

  # Get x axis breaks
  if (nrow(tb)) {
    x_axis_breaks <- .lesion_get_x_axis_breaks(max(tb$start_mb), per_chrom = per_chrom)
  } else {
    x_axis_breaks <- 50
  }

  # Set point_sizes
  point_size <- 100 / length(vcf)
  if (per_chrom == TRUE) {
    point_size <- point_size * 5
  }
  if (point_size > 2) {
    point_size <- 2
  } else if (point_size < 0.02) {
    point_size <- 0.02
  }

  # Create plots
  if (per_chrom == FALSE) {
    fig <- .plot_lesion_segregation_gg(tb, tb_limits, x_axis_breaks, point_size, sample_name)
    return(fig)
  } else {
    tb_l <- split(tb, tb$seqnames)
    tb_limits_l <- split(tb_limits, tb_limits$seqnames)
    fig_l <- mapply(.plot_lesion_segregation_gg,
      tb_l, tb_limits_l,
      MoreArgs = list("x_axis_breaks" = x_axis_breaks, "point_size" = point_size, "sample_name" = sample_name),
      SIMPLIFY = FALSE
    )
    return(fig_l)
  }
}


#' Determine the x axis breaks of a lesion segregation plot
#'
#' The maximum x axis value is determined by the variant with the highest coordinate.
#' Additionally, there are more breaks when the plot is created per chromosome.
#'
#' @param max_coord Maximum coordinate value of a variant
#' @param per_chrom Boolean. Describing whether to create a separate plot per chromosome
#'
#' @return Numeric vector of x_axis_breaks
#' @noRd
#'
.lesion_get_x_axis_breaks <- function(max_coord, per_chrom) {

  # Set x-axis breaks
  if (per_chrom == TRUE) {
    x_axis_break_length <- 10
  } else {
    x_axis_break_length <- 50
  }

  if (max_coord < x_axis_break_length) {
    x_axis_breaks <- x_axis_break_length
  } else {
    x_axis_breaks <- seq(x_axis_break_length, max_coord, by = x_axis_break_length)
  }
  return(x_axis_breaks)
}


#' Plot the strands of variants to show lesion segregation
#'
#' This is a helper function for 'plot_lesion_segregation'.
#' It performs the actual plotting.
#'
#' @param tb A tibble with strand information of variants
#' @param tb_limits tibble describing the chromosome limits.
#' This ensures entire chromosomes are plotted,
#' instead of just the part with variants
#' @param x_axis_breaks x_axis_breaks
#' @param point_size Scalar describing the point size of the plot
#' @param sample_name Name of the sample
#'
#' @return ggplot2 object
#'
#' @import ggplot2
#' @noRd
#'
.plot_lesion_segregation_gg <- function(tb, tb_limits, x_axis_breaks, point_size, sample_name) {

  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  y <- start_mb <- NULL

  if (.is_na(sample_name)) {
    my_labs <- labs(y = "Strand", x = "Coordinate (mb)")
  } else {
    my_labs <- labs(y = "Strand", x = "Coordinate (mb)", title = sample_name)
  }


  # Plot strandedness
  fig <- ggplot(tb, aes(y = y, x = start_mb)) +
    geom_jitter(width = 0, height = 0.1, size = point_size) +
    facet_grid(. ~ seqnames, scales = "free_x", space = "free_x") +
    geom_blank(data = tb_limits) +
    scale_y_continuous(breaks = c(0, 1), labels = c("-", "+"), limits = c(-0.5, 1.5)) +
    scale_x_continuous(breaks = x_axis_breaks) +
    my_labs +
    theme_bw() +
    theme(
      text = element_text(size = 18),
      axis.text.x = element_text(size = 11, angle = 90, vjust = 0.5, hjust = 1),
      axis.text.y = element_text(size = 20),
      panel.grid.minor = element_blank()
    )

  return(fig)
}