1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
|
## ----biocstyle, echo = FALSE, results = "asis", message = FALSE------------
library(BiocStyle)
BiocStyle::markdown()
## ----load-libs, message = FALSE--------------------------------------------
library(ensembldb)
library(EnsDb.Hsapiens.v86)
edbx <- filter(EnsDb.Hsapiens.v86, filter = ~ seq_name == "X")
## ----genomeToTranscript-define---------------------------------------------
gnm <- GRanges("X:107716399-107716401")
## ----genomeToTranscript-ex1-plot, message = FALSE--------------------------
library(Gviz)
## Since we're using Ensembl chromosome names we have to set:
options(ucscChromosomeNames = FALSE)
## Define a genome axis track
gat <- GenomeAxisTrack(range = gnm)
## Get all genes in that region
gnm_gns <- getGeneRegionTrackForGviz(edbx, filter = GRangesFilter(gnm))
gtx <- GeneRegionTrack(gnm_gns, name = "tx", geneSymbol = TRUE,
showId = TRUE)
## Generate a higlight track
ht <- HighlightTrack(trackList = list(gat, gtx), range = gnm)
## plot the region
plotTracks(list(ht))
## ----genomeToTranscript-ex1-map, message = FALSE---------------------------
## Map genomic coordinates to within-transcript coordinates
gnm_tx <- genomeToTranscript(gnm, edbx)
## ----genomeToTranscript-ex1-object-----------------------------------------
gnm_tx
## ----genomeToTranscript-ex2, message = FALSE-------------------------------
gnm_1 <- gnm
strand(gnm_1) <- "-"
gnm_2 <- gnm
strand(gnm_2) <- "+"
gnm <- c(gnm_1, gnm_2)
genomeToTranscript(gnm, edbx)
## ----genomeToProtein-ex1, message = FALSE----------------------------------
gnm <- GRanges("X", IRanges(start = c(630898, 644636, 644633, 634829),
width = c(5, 1, 1, 3)))
gnm_prt <- genomeToProtein(gnm, edbx)
## ----genomeToProtein-ex1-res1----------------------------------------------
gnm_prt[[1]]
## ----genomeToProtein-ex1-res2----------------------------------------------
gnm_prt[[2]]
## ----genomeToProtein-ex1-res3----------------------------------------------
gnm_prt[[3]]
## ----genomeToProtein-ex1-res3-2, message = FALSE---------------------------
prt <- proteins(edbx, filter = ProteinIdFilter(names(gnm_prt[[3]])))
nchar(prt$protein_sequence)
## ----genomeToProtein-ex1-res4----------------------------------------------
gnm_prt[[4]]
## ----proteinToTranscript-ex1, message = FALSE------------------------------
GAGE10 <- proteins(edbx, filter = ~ genename == "GAGE10")
GAGE10
## Define the IRanges object.
GAGE10_prt <- IRanges(start = 5, end = 9, names = GAGE10$protein_id)
## ----proteinToTranscript-ex1-map, message = FALSE--------------------------
GAGE10_tx <- proteinToTranscript(GAGE10_prt, edbx)
## ----proteinToTranscript-ex1-res-------------------------------------------
GAGE10_tx
## ----proteinToTranscript-ex2, message = FALSE------------------------------
ids <- c("O15266", "Q9HBJ8", "donotexistant")
prt <- IRanges(start = c(13, 43, 100), end = c(21, 80, 100))
names(prt) <- ids
prt_tx <- proteinToTranscript(prt, edbx, idType = "uniprot_id")
## ----proteinToTranscript-ex2-res1------------------------------------------
prt_tx[[1]]
## ----proteinToTranscript-ex2-res2------------------------------------------
prt_tx[[2]]
## ----proteinToTranscript-ex2-res3------------------------------------------
prt_tx[[3]]
## ----proteinToGenome-gage10-define, message = FALSE------------------------
## Define the IRanges object.
GAGE10_prt <- IRanges(start = 5, end = 9, names = "ENSP00000385415")
## ----proteinToGenome-gage10-map, message = FALSE---------------------------
GAGE10_gnm <- proteinToGenome(GAGE10_prt, edbx)
## ----proteinToGenome-gage10-res--------------------------------------------
GAGE10_gnm
## ----proteinToGenome-uniprot-ids, message = FALSE--------------------------
## Define the IRanges providing Uniprot IDs.
uni_rng <- IRanges(start = c(2, 12, 8), end = c(2, 15, 17),
names = c("D6RDZ7", "O15266", "H7C2F2"))
## We have to specify that the IDs are Uniprot IDs
uni_gnm <- proteinToGenome(uni_rng, edbx, idType = "uniprot_id")
## ----proteinToGenome-uniprot-cds_ok----------------------------------------
uni_gnm[[3]]
## ----proteinToGenome-uniprot-counts----------------------------------------
## To how many Ensembl proteins was each Uniprot ID mapped?
lengths(uni_gnm)
## ----proteinToGenome-uniprot-multi-----------------------------------------
uni_gnm[["O15266"]]
## ----proteinToGenome-SYP-fetch-domains, message = FALSE--------------------
SYP <- proteins(edbx, filter = ~ genename == "SYP",
columns = c("protein_id", "tx_id",
listColumns(edbx, "protein_domain")),
return.type = "AAStringSet")
SYP
## ----proteinToGenome-SYP-single-protein, message = FALSE-------------------
## How many proteins are annotated to SYP?
unique(mcols(SYP)$protein_id)
## Reduce the result to a single protein
SYP <- SYP[names(SYP) == "ENSP00000263233"]
## List the available protein domains and additional annotations
mcols(SYP)
## ----proteinToGenome-SYP-map, message = FALSE------------------------------
SYP_rng <- IRanges(start = mcols(SYP)$prot_dom_start,
end = mcols(SYP)$prot_dom_end)
mcols(SYP_rng) <- mcols(SYP)
## Map the domains to the genome. We set "id" to the name
## of the metadata columns containing the protein IDs
SYP_gnm <- proteinToGenome(SYP_rng, edbx, id = "protein_id")
## ----proteinToGenome-SYP-second--------------------------------------------
SYP_gnm[[2]]
## ----proteinToGenome-SYP-plot, message = FALSE-----------------------------
library(Gviz)
## Define a genome axis track
gat <- GenomeAxisTrack()
## Get the transcript ID:
txid <- SYP_gnm[[1]]$tx_id[1]
## Get a GRanges for the transcript
trt <- getGeneRegionTrackForGviz(edbx, filter = TxIdFilter(txid))
## Define a GRanges for the mapped protein domains and add
## metadata columns with the grouping of the ranges and the
## IDs of the corresponding protein domains, so they can be
## identified in the plot
dmns <- unlist(GRangesList(SYP_gnm))
dmns$grp <- rep(1:length(SYP_rng), lengths(SYP_gnm))
dmns$id <- rep(mcols(SYP_rng)$protein_domain_id, lengths(SYP_gnm))
## Since we're using Ensembl chromosome names we have to set
options(ucscChromosomeNames = FALSE)
## Plotting the transcript and the mapped protein domains.
plotTracks(list(gat,
GeneRegionTrack(trt, name = "tx"),
AnnotationTrack(dmns, group = dmns$grp,
id = dmns$id,
groupAnnotation = "id",
just.group = "above",
shape = "box",
name = "Protein domains")),
transcriptAnnotation = "transcript")
## ----transcriptToGenome-map, message = FALSE-------------------------------
rng_tx <- IRanges(start = c(501, 1), width = c(5, 5),
names = c("ENST00000486554", "ENST00000381578"))
rng_gnm <- transcriptToGenome(rng_tx, edbx)
## ----transcriptToGenome-res-1----------------------------------------------
rng_gnm
## ----pkp2-cdsToTranscript--------------------------------------------------
## Define the position within the CDS of the transcript
pkp2_cds <- IRanges(start = c(1643, 1881), width = c(1, 1),
name = rep("ENST00000070846", 2))
## Convert cds-relative to transcript-relative coordinates
pkp2 <- cdsToTranscript(pkp2_cds, EnsDb.Hsapiens.v86)
pkp2
## ----pkp2-transcriptToGenome-----------------------------------------------
pkp2_gnm <- transcriptToGenome(pkp2, EnsDb.Hsapiens.v86)
pkp2_gnm
## ----pkp2-variant-pos-validate---------------------------------------------
library(BSgenome.Hsapiens.NCBI.GRCh38)
getSeq(BSgenome.Hsapiens.NCBI.GRCh38, pkp2_gnm)
## ----transcriptToPrptein-map, message = FALSE------------------------------
rng_tx <- IRanges(start = c(501, 1, 200), width = c(5, 5, 4),
names = c("ENST00000486554", "ENST00000381578",
"ENST00000431238"))
rng_prt <- transcriptToProtein(rng_tx, edbx)
## ----transcriptToProtein-res-----------------------------------------------
rng_prt
## ----sessionInfo-----------------------------------------------------------
sessionInfo()
|