File: create_example_snvs.R

package info (click to toggle)
r-bioc-mutationalpatterns 3.16.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,360 kB
  • sloc: sh: 8; makefile: 2
file content (132 lines) | stat: -rw-r--r-- 4,467 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
library(tidyverse)
library(VariantAnnotation)
ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
library(ref_genome, character.only = TRUE)
library("TxDb.Hsapiens.UCSC.hg19.knownGene")

vcf_files <- list.files(system.file("extdata", package = "MutationalPatterns"),
  pattern = "sample.vcf", full.names = TRUE
)

sample_names <- c(
  "colon1", "colon2", "colon3",
  "intestine1", "intestine2", "intestine3",
  "liver1", "liver2", "liver3"
)

# Create grl
grl <- read_vcfs_as_granges(vcf_files, sample_names, ref_genome)
saveRDS(grl, "inst/states/read_vcfs_as_granges_output.rds")

# Create snvs
mut_mat <- mut_matrix(grl, ref_genome)
saveRDS(mut_mat, "inst/states/mut_mat_data.rds")

mut_mat_extended <- mut_matrix(grl, ref_genome, extension = 2)
saveRDS(mut_mat_extended, "inst/states/mut_mat_data_extended.rds")


# Create  transcription strand matrix
genes_hg19 <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene)
mut_mat_s <- mut_matrix_stranded(grl, ref_genome, genes_hg19)
saveRDS(mut_mat_s, "inst/states/mut_mat_s_data.rds")

# Create replication direction
repli_file <- system.file("extdata/ReplicationDirectionRegions.bed",
  package = "MutationalPatterns"
)
repli_strand <- read.table(repli_file, header = TRUE)
repli_strand_granges <- GRanges(
  seqnames = repli_strand$Chr,
  ranges = IRanges(
    start = repli_strand$Start + 1,
    end = repli_strand$Stop
  ),
  strand_info = repli_strand$Class
)
seqlevelsStyle(repli_strand_granges) <- "UCSC"
saveRDS(repli_strand_granges, "inst/states/repli_strand.rds")

# Create replication strand matrix
mut_mat_repli <- mut_matrix_stranded(grl, ref_genome, repli_strand_granges, mode = "replication")
saveRDS(mut_mat_repli, "inst/states/mut_mat_repli.rds")

# Extract signatures
nmf_res <- extract_signatures(mut_mat, rank = 2)
saveRDS(nmf_res, "inst/states/nmf_res_data.rds")
nmf_res_strand <- extract_signatures(mut_mat_s, rank = 2)
saveRDS(nmf_res_strand, "inst/states/nmf_res_strand_data.rds")

# Get signatures
signatures <- get_known_signatures()

# Normal refit
fit_res <- fit_to_signatures(mut_mat, signatures)
saveRDS(fit_res, "inst/states/snv_refit.rds")

# Strict refit
strict_refit <- fit_to_signatures_strict(mut_mat, signatures, max_delta = 0.05)
saveRDS(strict_refit$fit_res, "inst/states/strict_snv_refit.rds")

strict_refit_best <- fit_to_signatures_strict(mut_mat, signatures[,1:5], max_delta = 0.004, method = "best_subset")
saveRDS(strict_refit_best$fit_res, "inst/states/strict_best_snv_refit.rds")


# bootstrapped refit
set.seed(42)
contri_boots <- fit_to_signatures_bootstrapped(mut_mat, signatures, n_boots = 2, max_delta = 0.05)
saveRDS(contri_boots, "inst/states/bootstrapped_snv_refit.rds")

# Calculate lesion segregation
lesion_segretation <- calculate_lesion_segregation(grl[1:2], sample_names[1:2])
saveRDS(lesion_segretation, "inst/states/lesion_segregation.rds")


# Split mutation types
# Read in genomic regions
CTCF_g <- readRDS(system.file("states/CTCF_g_data.rds",
  package = "MutationalPatterns"
))
promoter_g <- readRDS(system.file("states/promoter_g_data.rds",
  package = "MutationalPatterns"
))
flanking_g <- readRDS(system.file("states/promoter_flanking_g_data.rds",
  package = "MutationalPatterns"
))

# Combine the regions into a single GRangesList
regions <- GRangesList(promoter_g, flanking_g, CTCF_g)
names(regions) <- c("Promoter", "Promoter flanking", "CTCF")

seqlevelsStyle(regions) <- "UCSC"

# Read in some variants.
grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
  package = "MutationalPatterns"
))

grl_split <- split_muts_region(grl, regions)
saveRDS(grl_split, "inst/states/grl_split_region.rds")

mut_mat_split_region <- mut_matrix(grl_split, ref_genome)
saveRDS(mut_mat_split_region, "inst/states/mut_mat_splitregions.rds")

mut_mat_longregion <- lengthen_mut_matrix(mut_mat_split_region)
saveRDS(mut_mat_longregion, "inst/states/mut_mat_longregions.rds")



# Create context potential damage tibble
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene

contexts = rownames(mut_mat)[1:6]
gene_ids = c(7157)
context_mismatches = context_potential_damage_analysis(contexts, txdb, ref_genome, gene_ids)
saveRDS(context_mismatches, "inst/states/context_mismatches.rds")


# Specifiy the chromosomes of interest.
chromosomes <- names(genome(grl)[1:3])
regional_sims = determine_regional_similarity(unlist(grl), ref_genome, chromosomes, window_size = 40, stepsize = 10, max_window_size_gen = 40000000)
saveRDS(regional_sims, "inst/states/regional_sims.rds")