1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
## ----biocstyle, echo = FALSE, results = "asis", message = FALSE------------
library(BiocStyle)
BiocStyle::markdown()
## ----load-libs, message = FALSE--------------------------------------------
library(ensembldb)
library(EnsDb.Hsapiens.v86)
edb <- EnsDb.Hsapiens.v86
## Retrieve the genes
gns <- genes(edb, filter = ~ protein_domain_id == "PF00010" & seq_name == "21")
## ----gene-GRanges----------------------------------------------------------
gns
## ----olig-tx-models, message = FALSE---------------------------------------
## Change chromosome naming style to UCSC
seqlevelsStyle(edb) <- "UCSC"
## ----edb-subset, message = FALSE, echo = FALSE-----------------------------
## Subset the EnsDb to speed up vignette processing
edb <- filter(edb, filter = ~ seq_name %in% c("chr21", "chr16"))
## ----retrieve-olig-tx-models, message = FALSE------------------------------
## Retrieve the transcript models for OLIG1 and OLIG2 that encode the
## the protein domain
txs <- getGeneRegionTrackForGviz(
edb, filter = ~ genename %in% c("OLIG1", "OLIG2") &
protein_domain_id == "PF00010")
## ----olig-prot-doms, message = FALSE---------------------------------------
pdoms <- proteins(edb, filter = ~ tx_id %in% txs$transcript &
protein_domain_source == "pfam",
columns = c("protein_domain_id", "prot_dom_start",
"prot_dom_end"))
pdoms
## ----olig-proteinToGenome, message = FALSE---------------------------------
pdoms_rng <- IRanges(start = pdoms$prot_dom_start, end = pdoms$prot_dom_end,
names = pdoms$protein_id)
pdoms_gnm <- proteinToGenome(pdoms_rng, edb)
## ----olig-proteinToGenome-result, message = FALSE--------------------------
pdoms_gnm
## ----olig-proteinToGenome-reorganize, message = FALSE----------------------
## Convert the list to a GRanges with grouping information
pdoms_gnm_grng <- unlist(GRangesList(pdoms_gnm))
pdoms_gnm_grng$id <- rep(pdoms$protein_domain_id, lengths(pdoms_gnm))
pdoms_gnm_grng$grp <- rep(1:nrow(pdoms), lengths(pdoms_gnm))
pdoms_gnm_grng
## ----olig-plot, message = FALSE, warning = FALSE, fig.align = "center", fig.width = 8, fig.height = 2, fig.cap = "Transcripts of genes OLIG1 and OLIG2 encoding a helix-loop-helix protein domain. Shown are all transcripts of the genes OLIG1 and OLIG2 that encode a protein with a helix-loop-helix protein domain (PF00010). Genomic positions encoding protein domains defined in Pfam are shown in light blue.", fig.pos = "h!"----
library(Gviz)
## Define the individual tracks:
## - Ideagram
ideo_track <- IdeogramTrack(genome = "hg38", chromosome = "chr21")
## - Genome axis
gaxis_track <- GenomeAxisTrack()
## - Transcripts
gene_track <- GeneRegionTrack(txs, showId = TRUE, just.group = "right",
name = "", geneSymbol = TRUE, size = 0.5)
## - Protein domains
pdom_track <- AnnotationTrack(pdoms_gnm_grng, group = pdoms_gnm_grng$grp,
id = pdoms_gnm_grng$id, groupAnnotation = "id",
just.group = "right", shape = "box",
name = "Protein domains", size = 0.5)
## Generate the plot
plotTracks(list(ideo_track, gaxis_track, gene_track, pdom_track))
## ----sim2-fetch, message = FALSE-------------------------------------------
## Fetch all SIM2 transcripts encoding PF00010
txs <- getGeneRegionTrackForGviz(edb, filter = ~ genename == "SIM2" &
protein_domain_id == "PF00010")
## Fetch all Pfam protein domains within these transcripts
pdoms <- proteins(edb, filter = ~ tx_id %in% txs$transcript &
protein_domain_source == "pfam",
columns = c("protein_domain_id", "prot_dom_start",
"prot_dom_end"))
## ----sim2-plot, message = FALSE, warning = FALSE, fig.align = "center", fig.width = 8, fig.height = 2, fig.cap = "Transcripts of the gene SIM2 encoding a helix-loop-helix domain. Shown are all transcripts of SIM2 encoding a protein with a helix-loop-helix protein domain (PF00010). Genomic positions encoding protein domains defined in Pfam are shown in light blue.", echo = FALSE, fig.pos = "h!"----
pdoms_rng <- IRanges(start = pdoms$prot_dom_start, end = pdoms$prot_dom_end,
names = pdoms$protein_id)
pdoms_gnm <- proteinToGenome(pdoms_rng, edb)
## Convert the list to a GRanges with grouping information
pdoms_gnm_grng <- unlist(GRangesList(pdoms_gnm))
pdoms_gnm_grng$id <- rep(pdoms$protein_domain_id, lengths(pdoms_gnm))
pdoms_gnm_grng$grp <- rep(1:nrow(pdoms), lengths(pdoms_gnm))
gene_track <- GeneRegionTrack(txs, showId = TRUE, just.group = "right",
name = "", geneSymbol = TRUE, size = 0.5)
## - Protein domains
pdom_track <- AnnotationTrack(pdoms_gnm_grng, group = pdoms_gnm_grng$grp,
id = pdoms_gnm_grng$id, groupAnnotation = "id",
just.group = "right", shape = "box",
name = "Protein domains", size = 0.5)
## Generate the plot
plotTracks(list(ideo_track, gaxis_track, gene_track, pdom_track))
## ----ex2-map, message = FALSE, warning = FALSE-----------------------------
gnm_pos <- GRanges("chr16", IRanges(89920138, width = 1))
prt_pos <- genomeToProtein(gnm_pos, edb)
prt_pos
## ----ex2-select, message = FALSE-------------------------------------------
select(edb, keys = ~ protein_id == names(prt_pos[[1]]), columns = "SYMBOL")
## ----ex2-plot, message = FALSE, warning = FALSE, fig.cap = "Transcripts overlapping, or close to, the genomic position of interest. Shown are all transcripts The genomic position of the variant is highlighted in red.", fig.width = 8, fig.height = 4, fig.pos = "h!"----
## Get transcripts overlapping the genomic position.
txs <- getGeneRegionTrackForGviz(edb, filter = GRangesFilter(gnm_pos))
## Get all transcripts within the region from the start of the most 5'
## and end of the most 3' exon.
all_txs <- getGeneRegionTrackForGviz(
edb, filter = GRangesFilter(range(txs), type = "within"))
## Plot the data
## - Ideogram
ideo_track <- IdeogramTrack(genome = "hg38", chromosome = "chr16")
## - Genome axis
gaxis_track <- GenomeAxisTrack()
## - Transcripts
gene_track <- GeneRegionTrack(all_txs, showId = TRUE, just.group = "right",
name = "", geneSymbol = TRUE, size = 0.5)
## - highlight the region.
hl_track <- HighlightTrack(list(gaxis_track, gene_track), range = gnm_pos)
## Generate the plot
plotTracks(list(ideo_track, hl_track))
## ----ex2-res, message = FALSE----------------------------------------------
## Get the amino acid sequences for the 3 transcripts
prt_seq <- proteins(edb, return.type = "AAStringSet",
filter = ~ protein_id == names(prt_pos[[1]]))
## Extract the amino acid at position 294
subseq(prt_seq, start = 294, end = 294)
## ----sessionInfo-----------------------------------------------------------
sessionInfo()
|