File: AllGenerics.R

package info (click to toggle)
r-bioc-alabaster.base 1.6.1%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 1,652 kB
  • sloc: cpp: 11,377; sh: 29; makefile: 2
file content (205 lines) | stat: -rw-r--r-- 9,384 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#' Save objects to disk
#'
#' Generic to save assorted R objects into appropriate on-disk representations.
#' More methods may be defined by other packages to extend the \pkg{alabaster.base} framework to new classes.
#'
#' @param x A Bioconductor object of the specified class.
#' @param path String containing the path to a directory in which to save \code{x}.
#' @param ... Additional named arguments to pass to specific methods.
#'
#' @return 
#' \code{dir} is created and populated with files containing the contents of \code{x}.
#' \code{NULL} should be invisibly returned.
#'
#' @section Comments for extension developers:
#' Methods for the \code{saveObject} generic should create a directory at \code{path} in which the contents of \code{x} are to be saved.
#' The files may consist of any format, though language-agnostic formats like HDF5, CSV, JSON are preferred.
#' For more complex objects, multiple files and subdirectories may be created within \code{path}. 
#' The only strict requirements are:
#' \itemize{
#' \item There must be an \code{OBJECT} file inside \code{path}, 
#' containing a JSON object with a \code{"type"} string property that specifies the class of the object, e.g., \code{"data_frame"}, \code{"summarized_experiment"}.
#' This will be used by loading functions to determine how to load the files into memory.
#' \item The names of files and subdirectories should not start with \code{_} or \code{.}.
#' These are reserved for applications, e.g., to build manifests or to store additional metadata.
#' }
#'
#' Callers can pass optional parameters to specific \code{saveObject} methods via \code{...}.
#' Any options recognized by a method should be prefixed by the name of the class used in the method's signature,
#' e.g., any options for \code{\link{saveObject,DataFrame-method}} should start with \code{DataFrame.}.
#' This scoping avoids conflicts between otherwise identically-named options of different methods.
#'
#' When developing \code{saveObject} methods of complex objects, a simple approach is to decompose \code{x} into its \dQuote{child} components.
#' Each component can then be saved into a subdirectory of \code{path}, levering the existing \code{saveObject} methods for the component classes.
#' In such cases, extension developers should actually call \code{\link{altSaveObject}} on each child component, rather than calling \link{saveObject} directly.
#' This ensures that any application-level overrides of the loading functions are respected. 
#' It is expected that each method will forward \code{...} (possibly after modification) to any internal \code{\link{altSaveObject}} calls.
#'
#' @section Comments for application developers:
#' Application developers can override \code{saveObject} by specifying a custom function in \code{\link{altSaveObject}}.
#' This can be used to point to a different function to handle the saving process for each class.
#' The custom function can be as simple as a wrapper around \code{saveObject} with some additional actions (e.g., to save more metadata),
#' or may be as complex as a full-fledged generic with its own methods for class-specific customizations.
#'
#' @author Aaron Lun
#' @examples
#' library(S4Vectors)
#' X <- DataFrame(X=LETTERS, Y=sample(3, 26, replace=TRUE))
#' 
#' tmp <- tempfile()
#' saveObject(X, tmp)
#' list.files(tmp, recursive=TRUE)
#'
#' @export
#' @aliases 
#' stageObject stageObject,ANY-method
#' searchForMethods .searchForMethods
#' @import methods
#' @importFrom jsonlite fromJSON
setGeneric("saveObject", function(x, path, ...) {
    if (file.exists(path)) {
        stop("cannot save ", class(x)[1], " at existing path '", path, "'")
    }

    # Need to search here to pick up any subclasses that might have better
    # saveObject methods in yet-to-be-loaded packages.
    if (.search_methods(x)) {
        fun <- selectMethod("saveObject", class(x)[1], optional=TRUE)
        if (!is.null(fun)) {
            return(fun(x, path, ...))
        }
    }

    # Only validate once we're outside of nested calls, as each parent should
    # validate its children; separate validation of each child is redundant.
    do_validate <- !is_nested$status
    if (do_validate) {
        is_nested$status <- TRUE
        on.exit({ is_nested$status <- FALSE }, add=TRUE, after=FALSE)
    }

    standardGeneric("saveObject")

    if (do_validate) {
        validateObject(path)
    }
})

is_nested <- new.env()
is_nested$status <- FALSE

#######################################
########### OLD STUFF HERE ############
#######################################

#' @export
setGeneric("stageObject", function(x, dir, path, child=FALSE, ...) {
    if (path != "." && file.exists(full.path <- file.path(dir, path))) {
        stop("cannot stage ", class(x)[1], " at existing path '", full.path, "'")
    }
    if (grepl("\\\\", path)) {
        stop("Windows-style path separators are not allowed")
    }

    if (!child) {
        parent <- dirname(path)
        while (parent != ".") {
            ppath <- file.path(dir, parent)
            candidates <- list.files(ppath, pattern="\\.json$")
            for (can in candidates) {
                schema <- fromJSON(file.path(ppath, can), simplifyVector=FALSE)[["$schema"]]
                if (!startsWith(schema, "redirection/")) {
                    stop("cannot save a non-child object inside another object's subdirectory at '", parent, "'")
                }
            }
            parent <- dirname(parent)
        }
    }

    # Need to search here to pick up any subclasses that might have better
    # stageObject methods in yet-to-be-loaded packages.
    if (.search_methods(x)) {
        fun <- selectMethod("stageObject", class(x)[1], optional=TRUE)
        if (!is.null(fun)) {
            return(fun(x, dir, path, child=child, ...))
        }
    }

    standardGeneric("stageObject")
})

#' Acquire file or metadata
#'
#' \emph{WARNING: these functions are deprecated. 
#' Applications are expected to handle acquisition of files before loaders are called.}
#' Acquire a file or metadata for loading.
#' As one might expect, these are typically used inside a \code{load*} function.
#'
#' @param project Any value specifying the project of interest.
#' The default methods expect a string containing a path to a staging directory,
#' but other objects can be used to control dispatch.
#' @param path String containing a relative path to a resource inside the staging directory.
#'
#' @return
#' \code{acquireFile} methods return a local path to the file corresponding to the requested resource.
#'
#' \code{acquireMetadata} methods return a named list of metadata for the requested resource.
#' 
#' @details
#' By default, files and metadata are loaded from the same staging directory that is written to by \code{\link{stageObject}}.
#' alabaster applications can define custom methods to obtain the files and metadata from a different location, e.g., remote databases.
#' This is achieved by dispatching on a different class of \code{project}.
#'
#' Each custom acquisition method should take two arguments.
#' The first argument is an R object representing some concept of a \dQuote{project}.
#' In the default case, this is a string containing a path to the staging directory representing the project.
#' However, it can be anything, e.g., a number containing a database identifier, a list of identifiers and versions, and so on -
#' as long as the custom acquisition method is capable of understanding it, the \code{load*} functions don't care.
#'
#' The second argument is a string containing the relative path to the resource inside that project.
#' This should be the path to a specific file inside the project, not the subdirectory containing the file.
#' More concretely, it should be equivalent to the \code{path} in the \emph{output} of \code{\link{stageObject}},
#' not the path to the subdirectory used as the input to the same function.
#'
#' The return value for each custom acquisition function should be the same as their local counterparts.
#' That is, any custom file acquisition function should return a file path,
#' and any custom metadata acquisition function should return a naamed list of metadata.
#'
#' @author Aaron Lun
#' @examples
#' # Staging an example DataFrame:
#' library(S4Vectors)
#' df <- DataFrame(A=1:10, B=LETTERS[1:10])
#' tmp <- tempfile()
#' dir.create(tmp)
#' info <- stageObject(df, tmp, path="coldata")
#' writeMetadata(info, tmp)
#'
#' # Retrieving the metadata:
#' meta <- acquireMetadata(tmp, "coldata/simple.csv.gz")
#' str(meta)
#'
#' # Retrieving the file:
#' acquireFile(tmp, "coldata/simple.csv.gz")
#'
#' @export
#' @name acquireFile
setGeneric("acquireFile", function(project, path) standardGeneric("acquireFile"))

#' @export
#' @rdname acquireFile
setGeneric("acquireMetadata", function(project, path) {
    ans <- standardGeneric("acquireMetadata")

    # Handling redirections. Applications that want to support different redirection
    # types should implement the relevant logic in their acquireMetadata method.
    if (dirname(ans[["$schema"]]) == "redirection") {
        first <- ans[["redirection"]][["targets"]][[1]]
        if (first$type != "local") {
            stop("unknown redirection type '", first$type, "' to '", first$location, "'")
        }
        ans <- acquireMetadata(project, first$location)
    }

    ans
})