File: proteins.R

package info (click to toggle)
r-bioc-ensembldb 2.6.5%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 2,720 kB
  • sloc: perl: 326; makefile: 5
file content (123 lines) | stat: -rw-r--r-- 4,089 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
## ----biocstyle, echo = FALSE, results = "asis", message = FALSE------------
library(BiocStyle)
library(ensembldb)
BiocStyle::markdown() 

## ----doeval, echo = FALSE, results = "hide"--------------------------------
## Globally switch off execution of code chunks
evalMe <- TRUE
haveProt <- FALSE
## evalMe <- .Platform$OS.type == "unix"
 

## ----loadlib, message = FALSE, eval = evalMe-------------------------------
library(ensembldb)
library(EnsDb.Hsapiens.v86)
edb <- EnsDb.Hsapiens.v86
## Evaluate whether we have protein annotation available
hasProteinData(edb)
 

## ----restrict9, message = FALSE, echo = FALSE------------------------------
## silently subsetting to chromosome 11
edb <- filter(edb, filter = ~ seq_name == "11") 

## ----listCols, message = FALSE, eval = evalMe------------------------------
listTables(edb)
 

## ----haveprot, echo = FALSE, results = "hide", eval = evalMe---------------
## Use this to conditionally disable eval on following chunks
haveProt <- hasProteinData(edb) & evalMe
 

## ----a_transcripts, eval = haveProt----------------------------------------
## Get also protein information for ZBTB16 transcripts
txs <- transcripts(edb, filter = GeneNameFilter("ZBTB16"),
		   columns = c("protein_id", "uniprot_id", "tx_biotype"))
txs
 

## ----a_transcripts_coding_noncoding, eval = haveProt-----------------------
## Subset to transcripts with tx_biotype other than protein_coding.
txs[txs$tx_biotype != "protein_coding", c("uniprot_id", "tx_biotype",
					  "protein_id")]
 

## ----a_transcripts_coding, eval = haveProt---------------------------------
## List the protein IDs and uniprot IDs for the coding transcripts
mcols(txs[txs$tx_biotype == "protein_coding",
	  c("tx_id", "protein_id", "uniprot_id")])
 

## ----a_transcripts_coding_up, eval = haveProt------------------------------
## List all uniprot mapping types in the database.
listUniprotMappingTypes(edb)

## Get all protein_coding transcripts of ZBTB16 along with their protein_id
## and Uniprot IDs, restricting to protein_id to uniprot_id mappings based
## on "DIRECT" mapping methods.
txs <- transcripts(edb, filter = list(GeneNameFilter("ZBTB16"),
				      UniprotMappingTypeFilter("DIRECT")),
		   columns = c("protein_id", "uniprot_id", "uniprot_db"))
mcols(txs)
 

## ----a_genes_protdomid_filter, eval = haveProt-----------------------------
## Get all genes encoded on chromosome 11 which protein contains 
## a certain protein domain.
gns <- genes(edb, filter = ~ prot_dom_id == "PS50097" & seq_name == "11")
length(gns)

sort(gns$gene_name)
 

## ----a_2_annotationdbi, message = FALSE, eval = haveProt-------------------
## Show all columns that are provided by the database
columns(edb)

## Show all key types/filters that are supported
keytypes(edb)
 

## ----a_2_select, message = FALSE, eval = haveProt--------------------------
select(edb, keys = "ZBTB16", keytype = "GENENAME",
       columns = "UNIPROTID")
 

## ----a_2_select_nmd, message = FALSE, eval = haveProt----------------------
## Call select, this time providing a GeneNameFilter.
select(edb, keys = GeneNameFilter("ZBTB16"),
       columns = c("TXBIOTYPE", "UNIPROTID", "PROTEINID"))
 

## ----b_proteins, message = FALSE, eval = haveProt--------------------------
## Get all proteins and return them as an AAStringSet
prts <- proteins(edb, filter = GeneNameFilter("ZBTB16"),
		 return.type = "AAStringSet")
prts
 

## ----b_proteins_mcols, message = FALSE, eval = haveProt--------------------
mcols(prts)
 

## ----b_proteins_prot_doms, message = FALSE, eval = haveProt----------------
## Get also protein domain annotations in addition to the protein annotations.
pd <- proteins(edb, filter = GeneNameFilter("ZBTB16"),
	       columns = c("tx_id", listColumns(edb, "protein_domain")),
	       return.type = "AAStringSet")
pd
 

## ----b_proteins_prot_doms_2, message = FALSE, eval = haveProt--------------
## The number of protein domains per protein:
table(names(pd))

## The mcols
mcols(pd)
 

## ----sessionInfo-----------------------------------------------------------
sessionInfo()