File: EnsDb-AnnotationDbi.Rd

package info (click to toggle)
r-bioc-ensembldb 2.14.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 2,764 kB
  • sloc: perl: 331; sh: 15; makefile: 5
file content (233 lines) | stat: -rw-r--r-- 7,626 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
\name{select}
\Rdversion{1.1}
\alias{select}
\alias{select,EnsDb-method}
\alias{columns,EnsDb-method}
\alias{keys,EnsDb-method}
\alias{keytypes,EnsDb-method}
\alias{mapIds,EnsDb-method}

\title{Integration into the AnnotationDbi framework}
\description{
  Several of the methods available for \code{AnnotationDbi} objects are
  also implemented for \code{EnsDb} objects. This enables to extract
  data from \code{EnsDb} objects in a similar fashion than from objects
  inheriting from the base annotation package class
  \code{AnnotationDbi}.
  In addition to the \emph{standard} usage, the \code{select} and
  \code{mapIds} for \code{EnsDb} objects support also the filter
  framework of the ensembdb package and thus allow to perform more
  fine-grained queries to retrieve data.
}
\usage{

\S4method{columns}{EnsDb}(x)
\S4method{keys}{EnsDb}(x, keytype, filter,...)
\S4method{keytypes}{EnsDb}(x)
\S4method{mapIds}{EnsDb}(x, keys, column, keytype, ..., multiVals)
\S4method{select}{EnsDb}(x, keys, columns, keytype, ...)

}
\arguments{

  (In alphabetic order)

  \item{column}{
    For \code{mapIds}: the column to search on, i.e. from which values
    should be retrieved.
  }

  \item{columns}{
    For \code{select}: the columns from which values should be
    retrieved. Use the \code{columns} method to list all possible
    columns.
  }

  \item{keys}{
    The keys/ids for which data should be retrieved from the
    database. This can be either a character vector of keys/IDs, a
    single filter object extending
    \code{\link[AnnotationFilter]{AnnotationFilter}}, an combination of
    filters \code{\link[AnnotationFilter]{AnnotationFilterList}} or a
    \code{formula} representing a filter expression (see
    \code{\link[AnnotationFilter]{AnnotationFilter}} for more details).
  }

  \item{keytype}{
    For \code{mapIds} and \code{select}: the type (column) that matches
    the provided keys. This argument does not have to be specified if
    argument \code{keys} is a filter object extending
    \code{AnnotationFilter} or a \code{list} of such objects.

    For \code{keys}: which keys should be returned from the database.
  }

  \item{filter}{
    For \code{keys}: either a single object extending
    \code{AnnotationFilter} or a list of such object to
    retrieve only specific keys from the database.
  }

  \item{multiVals}{
    What should \code{mapIds} do when there are multiple values that
    could be returned? Options are: \code{"first"} (default), \code{"list"},
    \code{"filter"}, \code{"asNA"}. See
    \code{mapIds} in the \code{AnnotationDbi} package  
    for a detailed description.
  }

  \item{x}{
    The \code{EnsDb} object.
  }

  \item{...}{
    Not used.
  }

}
\section{Methods and Functions}{
  \describe{

    \item{columns}{
      List all the columns that can be retrieved by the \code{mapIds}
      and \code{select} methods. Note that these column names are
      different from the ones supported by the \code{\link{genes}},
      \code{\link{transcripts}} etc. methods that can be listed by the
      \code{\link{listColumns}} method.

      Returns a character vector of supported column names.
    }

    \item{keys}{
      Retrieves all keys from the column name specified with
      \code{keytype}. By default (if \code{keytype} is not provided) it
      returns all gene IDs. Note that \code{keytype="TXNAME"} will
      return transcript ids, since no transcript names are available in
      the database.

      Returns a character vector of IDs.
    }

    \item{keytypes}{
      List all supported key types (column names).

      Returns a character vector of key types.
    }

    \item{mapIds}{
      Retrieve the mapped ids for a set of keys that are of a particular
      keytype. Argument \code{keys} can be either a character vector of
      keys/IDs, a single filter object extending
      \code{AnnotationFilter} or a list of such objects. For
      the latter, the argument \code{keytype} does not have to be
      specified. Importantly however, if the filtering system is used,
      the ordering of the results might not represent the ordering of
      the keys.

      The method usually returns a named character vector or, depending
      on the argument \code{multiVals} a named list, with names
      corresponding to the keys (same ordering is only guaranteed if
      \code{keys} is a character vector).
    }

    \item{select}{
      Retrieve the data as a \code{data.frame} based on parameters for
      selected \code{keys}, \code{columns} and \code{keytype}
      arguments. Multiple matches of the keys are returned in one row
      for each possible match. Argument \code{keys} can be either a
      character vector of keys/IDs, a single filter object extending
      \code{AnnotationFilter} or a list of such objects. For
      the latter, the argument \code{keytype} does not have to be
      specified.

      Note that values from a column \code{"TXNAME"} will be the same
      than for a column \code{"TXID"}, since internally no database
      column \code{"tx_name"} is present and the column is thus mapped
      to \code{"tx_id"}.

      Returns a \code{data.frame} with the column names corresponding to
      the argument \code{columns} and rows with all data matching the
      criteria specified with \code{keys}.

      The use of \code{select} without filters or keys and without
      restricting to specicic columns is strongly discouraged, as the
      SQL query to join all of the tables, especially if protein
      annotation data is available is very expensive.
    }

  }
}

\value{
  See method description above.
}
\author{
  Johannes Rainer
}
\seealso{
  \code{\link{listColumns}}
  \code{\link{transcripts}}
}
\examples{

library(EnsDb.Hsapiens.v86)
edb <- EnsDb.Hsapiens.v86

## List all supported keytypes.
keytypes(edb)

## List all supported columns for the select and mapIds methods.
columns(edb)

## List /real/ database column names.
listColumns(edb)

## Retrieve all keys corresponding to transcript ids.
txids <- keys(edb, keytype = "TXID")
length(txids)
head(txids)

## Retrieve all keys corresponding to gene names of genes encoded on chromosome X
gids <- keys(edb, keytype = "GENENAME", filter = SeqNameFilter("X"))
length(gids)
head(gids)

## Get a mapping of the genes BCL2 and BCL2L11 to all of their
## transcript ids and return the result as list
maps <- mapIds(edb, keys = c("BCL2", "BCL2L11"), column = "TXID",
               keytype = "GENENAME", multiVals = "list")
maps

## Perform the same query using a combination of a GeneNameFilter and a
## TxBiotypeFilter to just retrieve protein coding transcripts for these
## two genes.
mapIds(edb, keys = list(GeneNameFilter(c("BCL2", "BCL2L11")),
                        TxBiotypeFilter("protein_coding")), column = "TXID",
       multiVals = "list")

## select:
## Retrieve all transcript and gene related information for the above example.
select(edb, keys = list(GeneNameFilter(c("BCL2", "BCL2L11")),
                        TxBiotypeFilter("protein_coding")),
       columns = c("GENEID", "GENENAME", "TXID", "TXBIOTYPE", "TXSEQSTART",
                   "TXSEQEND", "SEQNAME", "SEQSTRAND"))

## Get all data for genes encoded on chromosome Y
Y <- select(edb, keys = "Y", keytype = "SEQNAME")
head(Y)
nrow(Y)

## Get selected columns for all lincRNAs encoded on chromosome Y. Here we use
## a filter expression to define what data to retrieve.
Y <- select(edb, keys = ~ seq_name == "Y" & gene_biotype == "lincRNA",
            columns = c("GENEID", "GENEBIOTYPE", "TXID", "GENENAME"))
head(Y)
nrow(Y)

}
\keyword{classes}