File: AnnotationDb-class.Rd

package info (click to toggle)
r-bioc-annotationdbi 1.68.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 8,476 kB
  • sloc: sql: 10,876; makefile: 3; sh: 2
file content (257 lines) | stat: -rw-r--r-- 9,862 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
\name{AnnotationDb-objects}
\docType{class}

% Classes
\alias{AnnotationDb}
\alias{class:AnnotationDb}
\alias{AnnotationDb-class}
\alias{ChipDb-class}
\alias{GODb-class}
\alias{InparanoidDb-class}
\alias{OrgDb-class}
\alias{ReactomeDb-class}
\alias{OrthologyDb-class}

% Accessor-like methods
\alias{loadDb}
\alias{dbconn,AnnotationDb-method}
\alias{saveDb}
\alias{saveDb,AnnotationDb-method}
\alias{show,AnnotationDb-method}
\alias{metadata,AnnotationDb-method}
\alias{names,AnnotationDb-method}
\alias{species}
\alias{species,AnnotationDb-method}
\alias{dbfile,AnnotationDb-method}
\alias{taxonomyId}
\alias{taxonomyId,AnnotationDb-method}


% select and select related methods
\alias{cols}
\alias{columns}
\alias{columns,AnnotationDb-method}
\alias{columns,OrgDb-method}
\alias{columns,ChipDb-method}
\alias{columns,GODb-method}
\alias{columns,InparanoidDb-method}
\alias{columns,Inparanoid8Db-method}
\alias{columns,ReactomeDb-method}
\alias{columns,OrthologyDb-method}

\alias{keytypes}
\alias{keytypes,OrgDb-method}
\alias{keytypes,ChipDb-method}
\alias{keytypes,GODb-method}
\alias{keytypes,InparanoidDb-method}
\alias{keytypes,Inparanoid8Db-method}
\alias{keytypes,ReactomeDb-method}
\alias{keytypes,OrthologyDb-method}

% \alias{keys} %Not aliased here because it is already aliased in Bimap-keys.Rd
% \alias{keys,AnnotationDb-method}
\alias{keys}
\alias{keys,OrgDb-method}
\alias{keys,ChipDb-method}
\alias{keys,GODb-method}
\alias{keys,InparanoidDb-method}
\alias{keys,Inparanoid8Db-method}
\alias{keys,ReactomeDb-method}
\alias{keys,OrthologyDb-method}

\alias{select}
% \alias{select,AnnotationDb-method}
\alias{select,OrgDb-method}
\alias{select,ChipDb-method}
\alias{select,GODb-method}
\alias{select,InparanoidDb-method}
\alias{select,Inparanoid8Db-method}
\alias{select,ReactomeDb-method}
\alias{select,OrthologyDb-method}

% mapIds should work for anything with select defined...
\alias{mapIds}
\alias{mapIds,AnnotationDb-method}




\title{AnnotationDb objects and their progeny, methods etc.}

\description{
  \code{AnnotationDb} is the virtual base class for all annotation
  packages.  It contain a database connection and is meant to be the
  parent for a set of classes in the Bioconductor annotation
  packages. These classes will provide a means of dispatch for a
  widely available set of \code{select} methods and thus allow the
  easy extraction of data from the annotation packages.

  \code{select}, \code{columns} and \code{keys} are used together to
  extract data from an \code{AnnotationDb} object (or any object derived
  from the parent class).  Examples of classes derived from the
  \code{AnnotationDb} object include (but are not limited to):
  \code{ChipDb}, \code{OrgDb} \code{GODb}, \code{OrthologyDb} and
  \code{ReactomeDb}.

  \code{columns} shows which kinds of data can be returned for the
  \code{AnnotationDb} object.
  
  \code{keytypes} allows the user to discover which keytypes can be
  passed in to \code{select} or \code{keys} and the \code{keytype}
  argument.
  
  \code{keys} returns keys for the database contained in the
  \code{AnnotationDb} object .  This method is already documented in the
  keys manual page but is mentioned again here because it's usage with
  \code{select} is so intimate.  By default it will return the primary
  keys for the database, but if used with the \code{keytype} argument,
  it will return the keys from that keytype.
  
  \code{select} will retrieve the data as a data.frame based on
  parameters for selected \code{keys} \code{columns} and \code{keytype}
  arguments.  Users should be warned that if you call \code{select} and request 
  columns that have multiple matches for your keys, select will return a 
  data.frame with one row for each possible match.  This has the effect that if 
  you request multiple columns and some of them have a many to one relationship 
  to the keys, things will continue to multiply accordingly.  So it's not a good 
  idea to request a large number of columns unless you know that what you are 
  asking for should have a one to one relationship with the initial set of keys.
  In general, if you need to retrieve a column (like GO) that has a many to one 
  relationship to the original keys, it is most useful to extract that 
  separately.

  \code{mapIds} gets the mapped ids (column) for a set of keys that are of a 
  particular keytype.  Usually returned as a named character vector.

  \code{saveDb} will take an AnnotationDb object and save the database
  to the file specified by the path passed in to the \code{file}
  argument.
  
  \code{loadDb} takes a .sqlite database file as an argument and uses
  data in the metadata table of that file to return an AnnotationDb
  style object of the appropriate type.

  \code{species} shows the genus and species label currently attached to
  the \code{AnnotationDb} objects database.

  \code{dbfile} gets the database file associated with an object.

  \code{dbconn} gets the datebase connection associated with an object.

  \code{taxonomyId} gets the taxonomy ID associated with an object (if available).

}

\usage{
  columns(x)
  keytypes(x)
  keys(x, keytype, ...)
  select(x, keys, columns, keytype, ...)
  mapIds(x, keys, column, keytype, ..., multiVals)
  saveDb(x, file)
  loadDb(file, packageName=NA)
}

\arguments{
  \item{x}{the \code{AnnotationDb} object. But in practice this will mean an 
    object derived from an \code{AnnotationDb} object such as a \code{OrgDb} or 
    \code{ChipDb} object.}
  \item{keys}{the keys to select records for from the database.  All possible 
    keys are returned by using the \code{keys} method.}
  \item{columns}{the columns or kinds of things that can be retrieved
    from the database.  As with \code{keys}, all possible columns are
    returned by using the \code{columns} method.}
  \item{keytype}{the keytype that matches the keys used.  For the
    \code{select} methods, this is used to indicate the kind of ID being used
    with the keys argument. For the \code{keys} method this is used to
    indicate which kind of keys are desired from \code{keys}
  }
  \item{column}{the column to search on (for \code{mapIds}).  Different from 
    \code{columns} in that it can only have a single element for the value}
  \item{...}{other arguments.  These include:
    \describe{
      \item{pattern:}{the pattern to match (used by keys)}
      \item{column:}{the column to search on. This is used by keys and is
	for when the thing you want to pattern match is different from
	the keytype, or when you want to simply want to get keys that
	have a value for the thing specified by the column argument.}
      \item{fuzzy:}{TRUE or FALSE value.  Use fuzzy matching? (this is
	used with pattern by the keys method)}
    }
  }
  \item{multiVals}{What should \code{mapIds} do when there are multiple values         
  that could be returned?  Options include:
    \describe{
      \item{first:}{This value means that when there are multiple matches only the 1st thing that comes back will be returned. This is the default behavior}
      \item{list:}{This will just returns a list object to the end user}
      \item{filter:}{This will remove all elements that contain multiple matches and will therefore return a shorter vector than what came in whenever some of the keys match more than one value}
      \item{asNA:}{This will return an NA value whenever there are multiple matches}
      \item{CharacterList:}{This just returns a SimpleCharacterList object}
      \item{FUN:}{You can also supply a function to the \code{multiVals} argument for custom behaviors.  The function must take a single argument and return a single value.  This function will be applied to all the elements and will serve a 'rule' that for which thing to keep when there is more than one element.  So for example this example function will always grab the last element in each result: \code{ last <- function(x){x[[length(x)]]} }
      }
    }
  }
  \item{file}{an \code{sqlite} file path.  A string the represents the
  full name you want for your sqlite database and also where to put it.}
  \item{packageName}{for internal use only}
}

\value{
  \code{keys},\code{columns} and \code{keytypes} each return a character
    vector or possible values.  \code{select} returns a data.frame.
}

\author{Marc Carlson}

\seealso{
  \code{keys},
  \code{\link[DBI:dbConnect]{dbConnect}},
  \code{\link[DBI:dbListTables]{dbListTables}},
  \code{\link[DBI:dbListTables]{dbListFields}},
  \code{\link[DBI:dbSendQuery]{dbGetQuery}},
  \link{Bimap}
}

\examples{
require(hgu95av2.db)
## display the columns
columns(hgu95av2.db)
## get the 1st 6 possible keys
keys <- head( keys(hgu95av2.db) )
keys
## lookup gene symbol and gene type for the 1st 6 keys
select(hgu95av2.db, keys=keys, columns = c("SYMBOL","GENETYPE"))

## get keys based on RefSeq
keyref <- head( keys(hgu95av2.db, keytype="REFSEQ") )
keyref
## list supported key types
keytypes(hgu95av2.db)
## lookup gene symbol and refseq ID based on refseq IDs by setting
## the keytype to "REFSEQ" and passing in refseq keys:
select(hgu95av2.db, keys=keyref, columns = c("SYMBOL","REFSEQ"),
       keytype="REFSEQ")

keys <- head(keys(hgu95av2.db, 'ENTREZID'))
## get a default result (captures only the 1st element)
mapIds(hgu95av2.db, keys=keys, column='ALIAS', keytype='ENTREZID')
## or use a different option
mapIds(hgu95av2.db, keys=keys, column='ALIAS', keytype='ENTREZID', 
    multiVals="CharacterList")
## Or define your own function
last <- function(x){x[[length(x)]]}
mapIds(hgu95av2.db, keys=keys, column='ALIAS', keytype='ENTREZID', 
    multiVals=last)
    
## For other ways to access the DB, you can use dbfile() or dbconn() to extract
dbconn(hgu95av2.db)
dbfile(hgu95av2.db)

## Try to retrieve an associated taxonomyId 
taxonomyId(hgu95av2.db)
}


\keyword{classes}
\keyword{methods}