File: AnnotationHub-class.Rd

package info (click to toggle)
r-bioc-annotationhub 3.14.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 592 kB
  • sloc: makefile: 2
file content (408 lines) | stat: -rw-r--r-- 13,603 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
\name{AnnotationHub-objects}
\docType{class}

% Classes
\alias{class:AnnotationHub}
\alias{AnnotationHub-class}
\alias{class:Hub}
\alias{Hub-class}

% Constructor
\alias{.Hub}
\alias{AnnotationHub}
\alias{refreshHub}

% Accessor-like methods
\alias{mcols,Hub-method}

\alias{cache}
\alias{cache,Hub-method}
\alias{cache,AnnotationHub-method}
\alias{cache<-}
\alias{cache<-,Hub-method}

\alias{hubUrl}
\alias{hubUrl,Hub-method}
\alias{hubCache}
\alias{hubCache,Hub-method}
\alias{hubDate}
\alias{hubDate,Hub-method}
\alias{package}
\alias{package,Hub-method}
\alias{removeCache}
\alias{isLocalHub}
\alias{isLocalHub,Hub-method}
\alias{isLocalHub<-}
\alias{isLocalHub<-,Hub-method}

\alias{possibleDates}
\alias{snapshotDate}
\alias{snapshotDate,Hub-method}
\alias{snapshotDate<-}
\alias{snapshotDate<-,Hub-method}

\alias{removeResources}
\alias{removeResources,missing-method}
\alias{removeResources,character-method}

\alias{dbconn,Hub-method}
\alias{dbfile,Hub-method}
\alias{.db_close}
\alias{recordStatus}
\alias{recordStatus,Hub-method}

% List-like
\alias{length,Hub-method}
\alias{names,Hub-method}
\alias{fileName,Hub-method}

% Subsetting:
\alias{$,Hub-method}

\alias{[[,Hub,character,missing-method}
\alias{[[,Hub,numeric,missing-method}

\alias{[,Hub,character,missing-method}
\alias{[,Hub,logical,missing-method}
\alias{[,Hub,numeric,missing-method}

\alias{[<-,Hub,character,missing,Hub-method}
\alias{[<-,Hub,logical,missing,Hub-method}
\alias{[<-,Hub,numeric,missing,Hub-method}

\alias{subset,Hub-method}

\alias{query}
\alias{query,Hub-method}

% as.list / c
\alias{as.list.Hub}
\alias{as.list,Hub-method}
\alias{c,Hub-method}

% show method:
\alias{show,Hub-method}
\alias{show,AnnotationHubResource-method}


\title{AnnotationHub objects and their related methods and functions}

\description{
  Use \code{AnnotationHub} to interact with Bioconductor's AnnotationHub
  service.  Query the instance to discover and use resources that are of
  interest, and then easily download and import the resource into R for
  immediate use.

  Use \code{AnnotationHub()} to retrieve information about all records
  in the hub. If working offline, add argument \code{localHub=TRUE} to
  work with a local, non-updated hub; It will only have resources
  available that have previously been downloaded. If offline, Please
  also see BiocManager vignette section on offline use to ensure proper
  funcionality. To force redownload of the hub,
  \code{refreshHub(hubClass="AnnotationHub")} can be utilized.

  If you are operating behind a proxy please see the AnnotationHub
  Vignette section on "Accessing behind a Proxy" for setting up
  configuration to allow AnnotationHub to run properly.

  Discover records in a hub using \code{mcols()}, \code{query()},
  \code{subset()}, and \code{[}.

  Retrieve individual records using \code{[[}. On first use of a
  resource, the corresponding files or other hub resources are
  downloaded from the internet to a local cache. On this and all
  subsequent uses the files are quickly input from the cache into the R
  session. If a user wants to download the file again and not use the
  cache version add the argument \code{force=TRUE}.

  \code{AnnotationHub} records can be added (and sometimes removed) at
  any time. \code{snapshotDate()} restricts hub records to those
  available at the time of the snapshot. \code{possibleDates()} lists
  snapshot dates valid for the current version of Bioconductor. You can
  check the status of a past record using \code{recordStatus()}.

  The location of the local cache can be found (and updated) with
  \code{getAnnotationHubCache} and \code{setAnnotationHubCache};
  \code{removeCache} removes all cache resources.

  For common hub troubleshooting, please see the AnnotationHub vignette
  entitled `vignette("TroubleshootingTheHubs", package="AnnotationHub")`.

}

\section{Constructors}{
  \describe{
    \item{\code{AnnotationHub(..., hub=getAnnotationHubOption("URL"),
	cache=getAnnotationHubOption("CACHE"),
	proxy=getAnnotationHubOption("PROXY"),
	localHub=getAnnotationHubOption("LOCAL"))}:}{

      Create an \code{AnnotationHub} instance, possibly updating the
      current database of records.
    }
  }
}

\section{Accessors}{
  In the code snippets below, \code{x} and \code{object} are
  AnnotationHub objects.

  \describe{
    \item{\code{hubCache(x)}:}{
      Gets the file system location of the local AnnotationHub cache.
    }
    \item{\code{hubUrl(x)}:}{
      Gets the URL for the online hub.
    }
   \item{\code{isLocalHub(x)}:}{
      Get whether or not constructor was called with \code{localHub=TRUE}.
    }
    \item{\code{length(x)}:}{
      Get the number of hub records.
    }
    \item{\code{names(x)}:}{
      Get the names (AnnotationHub unique identifiers, of the form
      AH12345) of the hub records.
    }
    \item{\code{fileName(x)}:}{
      Get the file path of the hub records as stored in the local cache
      (AnnotationHub files are stored as unique numbers, of the form
      12345).  NA is returned for those records which have not been
      cached.
    }
    \item{\code{mcols(x)}:}{
      Get the metadata columns describing each record. Columns include:
      \describe{

	\item{title}{Record title, frequently the file name of the
	  object.}

	\item{dataprovider}{Original provider of the resource, e.g.,
	  Ensembl, UCSC.}

	\item{species}{The species for which the record is most
	  relevant, e.g., \sQuote{Homo sapiens}.}

	\item{taxonomyid}{NCBI taxonomy identifier of the species.}

	\item{genome}{Genome build relevant to the record, e.g., hg19.}

	\item{description}{Textual description of the resource,
	  frequently automatically generated from file path and other
	  information available when the record was created.}

	\item{tags}{Single words added to the record to facilitate
	  identification, e.g,. TCGA, Roadmap.}

	\item{rdataclass}{The class of the R object used to represent
	  the object when imported into R, e.g., \code{GRanges},
	  \code{VCFFile}.}

	\item{sourceurl}{Original URL of the resource.}

	\item{sourectype}{Format of the original resource, e.g., BED
	  file.}
      }
    }

    \item{\code{dbconn(x)}:}{
      Return an open connection to the underyling SQLite database.}

    \item{\code{dbfile(x)}:}{
      Return the full path the underyling SQLite database.}

    \item{\code{.db_close(conn)}:}{
      Close the SQLite connection \code{conn} returned by \code{dbconn(x)}.}

  }
}

\section{Subsetting and related operations}{
  In the code snippets below, \code{x} is an AnnotationHub object.

  \describe{
    \item{\code{x$name}:}{
      Convenient reference to individual metadata columns, e.g.,
      \code{x$species}.
    }
    \item{\code{x[i]}:}{
      Numerical, logical, or character vector (of AnnotationHub names)
      to subset the hub, e.g., \code{x[x$species == "Homo sapiens"]}.
    }
    \item{\code{x[[i, force=FALSE, verbose=TRUE]]}:}{
      Numerical or character scalar to retrieve (if necessary) and
      import the resource into R. If a user wants to download the file
      again and not use the cache version add the argument
      \code{force=TRUE}. \code{verbose=FALSE} will quiet status messages.
    }
    \item{\code{query(x, pattern, ignore.case=TRUE, pattern.op= `&`)}:}{
      Return an AnnotationHub subset containing only those elements
      whose metadata matches \code{pattern}. Matching uses
      \code{pattern} as in \code{\link{grepl}} to search the
      \code{as.character} representation of each column, performing a
      logical \code{`&`} across columns.
      e.g., \code{query(x, c("Homo sapiens", "hg19", "GTF"))}.
      \describe{
	\item{\code{pattern}}{A character vector of patterns to search
	  (via \code{grepl}) for in any of the \code{mcols()} columns.}
	\item{\code{ignore.case}}{A logical(1) vector indicating whether
	  the search should ignore case (TRUE) or not (FALSE).}
	\item{\code{pattern.op}}{Any function of two arguments,
	  describing how matches across pattern elements are to be
	  combined. The default \code{`&`} requires that only records
	  with \emph{all} elements of \code{pattern} in their metadata
	  columns are returned. \code{`&`}, \code{`|`} and \code{`!`}
	  are most notably available. See \code{"?&"} or
	  \code{?base::Ops} for more information.}
      }
    }
    \item{\code{subset(x, subset)}:}{
      Return the subset of records containing only those elements whose
      metadata satisfies the \emph{expression} in \code{subset}. The
      expression can reference columns of \code{mcols(x)}, and should
      return a logical vector of length \code{length(x)}.
      e.g., \code{subset(x, species == "Homo sapiens" &
	genome=="GRCh38")}.
    }
    \item{\code{recordStatus(hub, record)}:}{
      Returns a \code{data.frame} of the record id and status. \code{hub} must
      be a \code{Hub} object and \code{record} must be a \code{character(1)}.
      Can be used to discover why a resource was removed from the hub.
    }
  }
}

\section{Cache and hub management}{
  In the code snippets below, \code{x} is an AnnotationHub object.
  \describe{
    \item{\code{snapshotDate(x)}:}{ and \code{snapshotDate(x) <- value}:
      Gets or sets the date for the snapshot in use. \code{value} should
      be one of \code{possibleDates()}.
    }
    \item{\code{possibleDates(x)}:}{
      Lists the valid snapshot dates for the version of Bioconductor that
      is being run (e.g., BiocManager::version()).
    }
    \item{\code{cache(x)}:}{ and \code{cache(x) <- NULL}: Adds (downloads) all
      resources in \code{x}, or removes all local resources
      corresponding to the records in \code{x} from the cache. In the later case,
      \code{x} would typically be a small subset of AnnotationHub
      resources. If \code{x} is a subset hub from a larger hub, and
      \code{localHub=TRUE} was used to construct the hubs, 
      the original object will need to be reconstructed to reflect the
      removed resources. See also \code{removeResources} for a nicer interface
      for removing cached resources, or \code{removeCache} for deleting the hub
      cache entirely.
    }
    \item{\code{hubUrl(x)}:}{
      Gets the URL for the online AnnotationHub.
    }
    \item{\code{hubCache(x)}:}{
      Gets the file system location of the local AnnotationHub cache.
    }
    \item{\code{refreshHub(..., hub, cache, proxy,
	hubClass=c("AnnotationHub", "ExperimentHub"))}:}{
      Force redownload of Hub sqlite file. This returns a Hub object as
      if calling the constructor (ie. AnnotationHub()). For force
      redownload specifically for AnnotationHub the base call should be
      \code{refreshHub(hubClass="AnnotationHub")}
    }
    \item{\code{removeResources(hub, ids)}:}{
      Removes listed ids from the local cache. ids are "AH" ids. Returns
      an updated hub object. To work with updated hub object suggested
      syntax is to reassign (ie. \code{hub = removeResources(hub,
      "AH1")}). If ids are missing will remove all previously downloaded
      local resources.
    }
    \item{\code{removeCache(x, ask=TRUE)}:}{
      Removes local AnnotationHub database and all related resources. After
      calling this function, the user will have to download any AnnotationHub
      resources again.
    }
  }
}

\section{Coercion}{
  In the code snippets below, \code{x} is an AnnotationHub object.
  \describe{
    \item{\code{as.list(x)}:}{
      Coerce x to a list of hub instances, one entry per
      element. Primarily for internal use.
    }
    \item{\code{c(x, ...)}:}{
      Concatenate one or more sub-hub. Sub-hubs must reference the same
      AnnotationHub instance. Duplicate entries are removed.
    }
  }
}

\author{Martin Morgan, Marc Carlson, Sonali Arora, Dan Tenenbaum, and
  Lori Shepherd}

\examples{
  ## create an AnnotationHub object
  library(AnnotationHub)
  ah = AnnotationHub()

  ## Summary of available records
  ah

  ## Detail for a single record
  ah[1]

  ## and what is the date we are using?
  snapshotDate(ah)

  ## how many resources?
  length(ah)

  ## from which resources, is data available?
  head(sort(table(ah$dataprovider), decreasing=TRUE))

  ## from which species, is data available ?
  head(sort(table(ah$species),decreasing=TRUE))

  ## what web service and local cache does this AnnotationHub point to?
  hubUrl(ah)
  hubCache(ah)

  ### Examples ###

  ## One can  search the hub for multiple strings
  ahs2 <- query(ah, c("GTF", "77","Ensembl", "Homo sapiens"))

  ## information about the file can be retrieved using
  ahs2[1]

  ## one can further extract information from this show method
  ## like the sourceurl using:
  ahs2$sourceurl
  ahs2$description
  ahs2$title

  ## We can download a file by name like this (using a list semantic):
  gr <- ahs2[[1]]
  ## And we can also extract it by the names like this:
  res <- ah[["AH28812"]]

  ## the gtf file is returned as a GenomicRanges object and contains
  ## data about which organism it belongs to, its seqlevels and seqlengths
  seqinfo(gr)

  ## each GenomicRanges contains a metadata slot which can be used to get
  ## the name of the hub object and other associated metadata.
  metadata(gr)
  ah[metadata(gr)$AnnotationHubName]

  ## And we can also use "[" to restrict the things that are in the
  ## AnnotationHub object (by position, character, or logical vector).
  ## Here is a demo of position:
  subHub <- ah[1:3]

  ## recordStatus
  recordStatus(ah, "TEST")
  recordStatus(ah, "AH7220")
}
\seealso{\code{\link{getInfoOnIds}}}
\keyword{classes}
\keyword{methods}