File: strict_refit_backwards_selection_sample.R

package info (click to toggle)
r-bioc-mutationalpatterns 3.16.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,360 kB
  • sloc: sh: 8; makefile: 2
file content (170 lines) | stat: -rw-r--r-- 6,264 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#' Function to perform the strict signature refitting for a single sample with backwards selection
#'
#' @param mut_mat_sample mutation count matrix for a single sample
#' @param my_signatures signature matrix
#' @param max_delta The maximum difference in original vs reconstructed cosine similarity between two iterations.
#'
#' @return A list containing a fit_res object, similar to 'fit_to_signatures' and a ggplot graph
#' that for each sample shows in what order the signatures were removed and how this affected the cosine similarity.

#' @noRd
#'
.strict_refit_backwards_selection_sample = function(mut_mat_sample, my_signatures, max_delta){
    
    # Determine the number of signatures
    nsigs <- ncol(my_signatures)
    
    # Fit again
    fit_res <- fit_to_signatures(mut_mat_sample, my_signatures)
    sim <- .get_cos_sim_ori_vs_rec(mut_mat_sample, fit_res)
    
    # Keep track of the cosine similarity and which signatures are removed.
    sims <- vector("list", nsigs)
    sims[[1]] <- sim
    removed_sigs <- vector("list", nsigs)
    removed_sigs[[1]] <- "None"
    
    if (nsigs > 1){ # Only remove signatures if there is more than 1.
    # Sequentially remove the signature with the lowest contribution
        for (j in seq(2, nsigs)) {
            
            # Remove signature with the weakest relative contribution
            contri_order <- fit_res$contribution %>%
                prop.table(2) %>%
                rowSums() %>%
                order()
            weakest_sig_index <- contri_order[1]
            weakest_sig <- colnames(my_signatures)[weakest_sig_index]
            removed_sigs[[j]] <- weakest_sig
            signatures_sel <- my_signatures[, -weakest_sig_index, drop = FALSE]
            
            
            # Fit with new signature selection
            fit_res <- fit_to_signatures(mut_mat_sample, signatures_sel)
            sim_new <- .get_cos_sim_ori_vs_rec(mut_mat_sample, fit_res)
            
            if (is.nan(sim_new) == TRUE) {
                sim_new <- 0
                warning("New similarity between the original and the reconstructed 
                              spectra after the removal of a signature was NaN. 
                              It has been converted into a 0. 
                              This happened with the following fit_res:")
                print(fit_res)
            }
            sims[[j]] <- sim_new
            
            # Check if the loss in cosine similarity between the original vs reconstructed after removing the signature is below the cutoff.
            delta <- sim - sim_new
            if (delta <= max_delta) {
                my_signatures <- signatures_sel
                sim <- sim_new
            }
            else {
                break
            }
        }
    }
    
    # Plot how the cosine similarities decayed
    sim_decay_fig <- .plot_sim_decay(sims, removed_sigs, max_delta, "backwards")
    
    # Perform final fit on selected signatures
    fit_res <- fit_to_signatures(mut_mat_sample, my_signatures)
    
    # Add data of sample to list.
    results <- list("sim_decay_fig" = sim_decay_fig, "fit_res" = fit_res)
    return(results)
}

#' Get the cosine similarity between a reconstructed mutation matrix and the original
#'
#' @param mut_matrix mutation count matrix (dimensions: x mutation types
#' X n samples)
#' @param fit_res Named list with signature contributions and reconstructed
#' mutation matrix
#'
#' @return Cosine similarity
#' @noRd
#'
.get_cos_sim_ori_vs_rec <- function(mut_matrix, fit_res) {
    cos_sim_all <- cos_sim_matrix(mut_matrix, fit_res$reconstructed)
    cos_sim <- diag(cos_sim_all)
    mean_cos_sim <- mean(cos_sim)
    return(mean_cos_sim)
}


#' Plot decay in cosine similarity as signatures are removed.
#'
#' @param sims List of cosine similarities
#' @param removed_sigs List of iteratively removed signatures
#' @param max_delta The maximum difference in original vs reconstructed cosine similarity.
#' @param method The signature selection method that was used. Possible values:
#'              * 'backwards';
#'              * 'best_subset';
#' 
#' @import ggplot2
#' @importFrom magrittr %>%
#' @noRd
#' @return ggplot object
#'
.plot_sim_decay <- function(sims, removed_sigs, max_delta, method = c("backwards", "best_subset")) {
    
    # These variables use non standard evaluation.
    # To avoid R CMD check complaints we initialize them to NULL.
    Removed_signatures <- Cosine_similarity <- NULL
    
    # Match argument
    method = match.arg(method)
    
    #Remove NULL values
    sims <- Filter(Negate(is.null), sims)
    removed_sigs <- Filter(Negate(is.null), removed_sigs)

    # Prepare data
    sims <- sims[!S4Vectors::isEmpty(sims)] %>%
        unlist()
    removed_sigs <- removed_sigs[!S4Vectors::isEmpty(removed_sigs)] %>%
        unlist()
    tb <- tibble::tibble(
        "Cosine_similarity" = sims,
        "Removed_signatures" = factor(removed_sigs, levels = removed_sigs)
    )
    
    # Determine if the final removed signature exceeded the cutoff.
    sims_l <- length(sims)
    col <- rep("low_delta", sims_l)
    if (sims_l > 1){ # Check if any signatures have been removed, before calculating the delta.
        final_delta <- sims[sims_l - 1] - sims[sims_l]
        if (final_delta > max_delta) {
            col[sims_l] <- "high_delta"
        }
    }
    # Set the x-axis label and theme
    if (method == "backwards"){
        xlab <- "Removed signatures"
        my_theme <- theme(
            axis.text.x = element_text(angle = 90, size = 10, hjust = 1, vjust = 0.5),
            text = element_text(size = 12)
        )
    } else{
        xlab <- "Nr. signatures used"
        my_theme <- theme(text = element_text(size = 12))
    }
    
    # Create plot
    fig <- ggplot(data = tb, aes(x = Removed_signatures, y = Cosine_similarity, fill = col)) +
        geom_bar(stat = "identity") +
        scale_fill_manual(
            limits = c("low_delta", "high_delta"),
            values = c("grey", "red"),
            guide = "none"
        ) +
        labs(
            x = xlab,
            y = paste0("Cosine similarity (max delta: ", max_delta, ")")
        ) +
        theme_classic() +
        my_theme
    return(fig)
}