1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
|
\name{AnnotationHub-objects}
\docType{class}
% Classes
\alias{class:AnnotationHub}
\alias{AnnotationHub-class}
\alias{class:Hub}
\alias{Hub-class}
% Constructor
\alias{.Hub}
\alias{AnnotationHub}
\alias{refreshHub}
% Accessor-like methods
\alias{mcols,Hub-method}
\alias{cache}
\alias{cache,Hub-method}
\alias{cache,AnnotationHub-method}
\alias{cache<-}
\alias{cache<-,Hub-method}
\alias{hubUrl}
\alias{hubUrl,Hub-method}
\alias{hubCache}
\alias{hubCache,Hub-method}
\alias{hubDate}
\alias{hubDate,Hub-method}
\alias{package}
\alias{package,Hub-method}
\alias{removeCache}
\alias{isLocalHub}
\alias{isLocalHub,Hub-method}
\alias{isLocalHub<-}
\alias{isLocalHub<-,Hub-method}
\alias{possibleDates}
\alias{snapshotDate}
\alias{snapshotDate,Hub-method}
\alias{snapshotDate<-}
\alias{snapshotDate<-,Hub-method}
\alias{removeResources}
\alias{removeResources,missing-method}
\alias{removeResources,character-method}
\alias{dbconn,Hub-method}
\alias{dbfile,Hub-method}
\alias{.db_close}
\alias{recordStatus}
\alias{recordStatus,Hub-method}
% List-like
\alias{length,Hub-method}
\alias{names,Hub-method}
\alias{fileName,Hub-method}
% Subsetting:
\alias{$,Hub-method}
\alias{[[,Hub,character,missing-method}
\alias{[[,Hub,numeric,missing-method}
\alias{[,Hub,character,missing-method}
\alias{[,Hub,logical,missing-method}
\alias{[,Hub,numeric,missing-method}
\alias{[<-,Hub,character,missing,Hub-method}
\alias{[<-,Hub,logical,missing,Hub-method}
\alias{[<-,Hub,numeric,missing,Hub-method}
\alias{subset,Hub-method}
\alias{query}
\alias{query,Hub-method}
% as.list / c
\alias{as.list.Hub}
\alias{as.list,Hub-method}
\alias{c,Hub-method}
% show method:
\alias{show,Hub-method}
\alias{show,AnnotationHubResource-method}
\title{AnnotationHub objects and their related methods and functions}
\description{
Use \code{AnnotationHub} to interact with Bioconductor's AnnotationHub
service. Query the instance to discover and use resources that are of
interest, and then easily download and import the resource into R for
immediate use.
Use \code{AnnotationHub()} to retrieve information about all records
in the hub. If working offline, add argument \code{localHub=TRUE} to
work with a local, non-updated hub; It will only have resources
available that have previously been downloaded. If offline, Please
also see BiocManager vignette section on offline use to ensure proper
funcionality. To force redownload of the hub,
\code{refreshHub(hubClass="AnnotationHub")} can be utilized.
If you are operating behind a proxy please see the AnnotationHub
Vignette section on "Accessing behind a Proxy" for setting up
configuration to allow AnnotationHub to run properly.
Discover records in a hub using \code{mcols()}, \code{query()},
\code{subset()}, and \code{[}.
Retrieve individual records using \code{[[}. On first use of a
resource, the corresponding files or other hub resources are
downloaded from the internet to a local cache. On this and all
subsequent uses the files are quickly input from the cache into the R
session. If a user wants to download the file again and not use the
cache version add the argument \code{force=TRUE}.
\code{AnnotationHub} records can be added (and sometimes removed) at
any time. \code{snapshotDate()} restricts hub records to those
available at the time of the snapshot. \code{possibleDates()} lists
snapshot dates valid for the current version of Bioconductor. You can
check the status of a past record using \code{recordStatus()}.
The location of the local cache can be found (and updated) with
\code{getAnnotationHubCache} and \code{setAnnotationHubCache};
\code{removeCache} removes all cache resources.
For common hub troubleshooting, please see the AnnotationHub vignette
entitled `vignette("TroubleshootingTheHubs", package="AnnotationHub")`.
}
\section{Constructors}{
\describe{
\item{\code{AnnotationHub(..., hub=getAnnotationHubOption("URL"),
cache=getAnnotationHubOption("CACHE"),
proxy=getAnnotationHubOption("PROXY"),
localHub=getAnnotationHubOption("LOCAL"))}:}{
Create an \code{AnnotationHub} instance, possibly updating the
current database of records.
}
}
}
\section{Accessors}{
In the code snippets below, \code{x} and \code{object} are
AnnotationHub objects.
\describe{
\item{\code{hubCache(x)}:}{
Gets the file system location of the local AnnotationHub cache.
}
\item{\code{hubUrl(x)}:}{
Gets the URL for the online hub.
}
\item{\code{isLocalHub(x)}:}{
Get whether or not constructor was called with \code{localHub=TRUE}.
}
\item{\code{length(x)}:}{
Get the number of hub records.
}
\item{\code{names(x)}:}{
Get the names (AnnotationHub unique identifiers, of the form
AH12345) of the hub records.
}
\item{\code{fileName(x)}:}{
Get the file path of the hub records as stored in the local cache
(AnnotationHub files are stored as unique numbers, of the form
12345). NA is returned for those records which have not been
cached.
}
\item{\code{mcols(x)}:}{
Get the metadata columns describing each record. Columns include:
\describe{
\item{title}{Record title, frequently the file name of the
object.}
\item{dataprovider}{Original provider of the resource, e.g.,
Ensembl, UCSC.}
\item{species}{The species for which the record is most
relevant, e.g., \sQuote{Homo sapiens}.}
\item{taxonomyid}{NCBI taxonomy identifier of the species.}
\item{genome}{Genome build relevant to the record, e.g., hg19.}
\item{description}{Textual description of the resource,
frequently automatically generated from file path and other
information available when the record was created.}
\item{tags}{Single words added to the record to facilitate
identification, e.g,. TCGA, Roadmap.}
\item{rdataclass}{The class of the R object used to represent
the object when imported into R, e.g., \code{GRanges},
\code{VCFFile}.}
\item{sourceurl}{Original URL of the resource.}
\item{sourectype}{Format of the original resource, e.g., BED
file.}
}
}
\item{\code{dbconn(x)}:}{
Return an open connection to the underyling SQLite database.}
\item{\code{dbfile(x)}:}{
Return the full path the underyling SQLite database.}
\item{\code{.db_close(conn)}:}{
Close the SQLite connection \code{conn} returned by \code{dbconn(x)}.}
}
}
\section{Subsetting and related operations}{
In the code snippets below, \code{x} is an AnnotationHub object.
\describe{
\item{\code{x$name}:}{
Convenient reference to individual metadata columns, e.g.,
\code{x$species}.
}
\item{\code{x[i]}:}{
Numerical, logical, or character vector (of AnnotationHub names)
to subset the hub, e.g., \code{x[x$species == "Homo sapiens"]}.
}
\item{\code{x[[i, force=FALSE, verbose=TRUE]]}:}{
Numerical or character scalar to retrieve (if necessary) and
import the resource into R. If a user wants to download the file
again and not use the cache version add the argument
\code{force=TRUE}. \code{verbose=FALSE} will quiet status messages.
}
\item{\code{query(x, pattern, ignore.case=TRUE, pattern.op= `&`)}:}{
Return an AnnotationHub subset containing only those elements
whose metadata matches \code{pattern}. Matching uses
\code{pattern} as in \code{\link{grepl}} to search the
\code{as.character} representation of each column, performing a
logical \code{`&`} across columns.
e.g., \code{query(x, c("Homo sapiens", "hg19", "GTF"))}.
\describe{
\item{\code{pattern}}{A character vector of patterns to search
(via \code{grepl}) for in any of the \code{mcols()} columns.}
\item{\code{ignore.case}}{A logical(1) vector indicating whether
the search should ignore case (TRUE) or not (FALSE).}
\item{\code{pattern.op}}{Any function of two arguments,
describing how matches across pattern elements are to be
combined. The default \code{`&`} requires that only records
with \emph{all} elements of \code{pattern} in their metadata
columns are returned. \code{`&`}, \code{`|`} and \code{`!`}
are most notably available. See \code{"?&"} or
\code{?base::Ops} for more information.}
}
}
\item{\code{subset(x, subset)}:}{
Return the subset of records containing only those elements whose
metadata satisfies the \emph{expression} in \code{subset}. The
expression can reference columns of \code{mcols(x)}, and should
return a logical vector of length \code{length(x)}.
e.g., \code{subset(x, species == "Homo sapiens" &
genome=="GRCh38")}.
}
\item{\code{recordStatus(hub, record)}:}{
Returns a \code{data.frame} of the record id and status. \code{hub} must
be a \code{Hub} object and \code{record} must be a \code{character(1)}.
Can be used to discover why a resource was removed from the hub.
}
}
}
\section{Cache and hub management}{
In the code snippets below, \code{x} is an AnnotationHub object.
\describe{
\item{\code{snapshotDate(x)}:}{ and \code{snapshotDate(x) <- value}:
Gets or sets the date for the snapshot in use. \code{value} should
be one of \code{possibleDates()}.
}
\item{\code{possibleDates(x)}:}{
Lists the valid snapshot dates for the version of Bioconductor that
is being run (e.g., BiocManager::version()).
}
\item{\code{cache(x)}:}{ and \code{cache(x) <- NULL}: Adds (downloads) all
resources in \code{x}, or removes all local resources
corresponding to the records in \code{x} from the cache. In the later case,
\code{x} would typically be a small subset of AnnotationHub
resources. If \code{x} is a subset hub from a larger hub, and
\code{localHub=TRUE} was used to construct the hubs,
the original object will need to be reconstructed to reflect the
removed resources. See also \code{removeResources} for a nicer interface
for removing cached resources, or \code{removeCache} for deleting the hub
cache entirely.
}
\item{\code{hubUrl(x)}:}{
Gets the URL for the online AnnotationHub.
}
\item{\code{hubCache(x)}:}{
Gets the file system location of the local AnnotationHub cache.
}
\item{\code{refreshHub(..., hub, cache, proxy,
hubClass=c("AnnotationHub", "ExperimentHub"))}:}{
Force redownload of Hub sqlite file. This returns a Hub object as
if calling the constructor (ie. AnnotationHub()). For force
redownload specifically for AnnotationHub the base call should be
\code{refreshHub(hubClass="AnnotationHub")}
}
\item{\code{removeResources(hub, ids)}:}{
Removes listed ids from the local cache. ids are "AH" ids. Returns
an updated hub object. To work with updated hub object suggested
syntax is to reassign (ie. \code{hub = removeResources(hub,
"AH1")}). If ids are missing will remove all previously downloaded
local resources.
}
\item{\code{removeCache(x, ask=TRUE)}:}{
Removes local AnnotationHub database and all related resources. After
calling this function, the user will have to download any AnnotationHub
resources again.
}
}
}
\section{Coercion}{
In the code snippets below, \code{x} is an AnnotationHub object.
\describe{
\item{\code{as.list(x)}:}{
Coerce x to a list of hub instances, one entry per
element. Primarily for internal use.
}
\item{\code{c(x, ...)}:}{
Concatenate one or more sub-hub. Sub-hubs must reference the same
AnnotationHub instance. Duplicate entries are removed.
}
}
}
\author{Martin Morgan, Marc Carlson, Sonali Arora, Dan Tenenbaum, and
Lori Shepherd}
\examples{
## create an AnnotationHub object
library(AnnotationHub)
ah = AnnotationHub()
## Summary of available records
ah
## Detail for a single record
ah[1]
## and what is the date we are using?
snapshotDate(ah)
## how many resources?
length(ah)
## from which resources, is data available?
head(sort(table(ah$dataprovider), decreasing=TRUE))
## from which species, is data available ?
head(sort(table(ah$species),decreasing=TRUE))
## what web service and local cache does this AnnotationHub point to?
hubUrl(ah)
hubCache(ah)
### Examples ###
## One can search the hub for multiple strings
ahs2 <- query(ah, c("GTF", "77","Ensembl", "Homo sapiens"))
## information about the file can be retrieved using
ahs2[1]
## one can further extract information from this show method
## like the sourceurl using:
ahs2$sourceurl
ahs2$description
ahs2$title
## We can download a file by name like this (using a list semantic):
gr <- ahs2[[1]]
## And we can also extract it by the names like this:
res <- ah[["AH28812"]]
## the gtf file is returned as a GenomicRanges object and contains
## data about which organism it belongs to, its seqlevels and seqlengths
seqinfo(gr)
## each GenomicRanges contains a metadata slot which can be used to get
## the name of the hub object and other associated metadata.
metadata(gr)
ah[metadata(gr)$AnnotationHubName]
## And we can also use "[" to restrict the things that are in the
## AnnotationHub object (by position, character, or logical vector).
## Here is a demo of position:
subHub <- ah[1:3]
## recordStatus
recordStatus(ah, "TEST")
recordStatus(ah, "AH7220")
}
\seealso{\code{\link{getInfoOnIds}}}
\keyword{classes}
\keyword{methods}
|