File: dump-management.Rd

package info (click to toggle)
r-bioc-hdf5array 1.34.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 8,736 kB
  • sloc: ansic: 5,815; makefile: 4
file content (263 lines) | stat: -rw-r--r-- 9,024 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
\name{HDF5-dump-management}

\alias{HDF5-dump-management}
\alias{dump-management}

\alias{setHDF5DumpDir}
\alias{getHDF5DumpDir}

\alias{setHDF5DumpFile}
\alias{getHDF5DumpFile}
\alias{lsHDF5DumpFile}

\alias{setHDF5DumpName}
\alias{getHDF5DumpName}

\alias{setHDF5DumpChunkLength}
\alias{getHDF5DumpChunkLength}
\alias{setHDF5DumpChunkShape}
\alias{getHDF5DumpChunkShape}
\alias{getHDF5DumpChunkDim}

\alias{setHDF5DumpCompressionLevel}
\alias{getHDF5DumpCompressionLevel}

\alias{appendDatasetCreationToHDF5DumpLog}
\alias{showHDF5DumpLog}

\title{HDF5 dump management}

\description{
  A set of utilities to control the location and physical properties of
  automatically created HDF5 datasets.
}

\usage{
setHDF5DumpDir(dir)
setHDF5DumpFile(filepath)
setHDF5DumpName(name)
setHDF5DumpChunkLength(length=1000000L)
setHDF5DumpChunkShape(shape="scale")
setHDF5DumpCompressionLevel(level=6L)

getHDF5DumpDir()
getHDF5DumpFile()
getHDF5DumpName(for.use=FALSE)
getHDF5DumpChunkLength()
getHDF5DumpChunkShape()
getHDF5DumpCompressionLevel()

lsHDF5DumpFile()

showHDF5DumpLog()

## For developers:
getHDF5DumpChunkDim(dim)
appendDatasetCreationToHDF5DumpLog(filepath, name, dim, type,
                                   chunkdim, level)
}

\arguments{
  \item{dir}{
    The path (as a single string) to the current \emph{HDF5 dump directory},
    that is, to the (new or existing) directory where \emph{HDF5 dump files}
    with automatic names will be created. This is ignored if the user
    specified an \emph{HDF5 dump file} with \code{setHDF5DumpFile}.
    If \code{dir} is missing, then the \emph{HDF5 dump directory} is set
    back to its default value i.e. to some directory under \code{tempdir()}
    (call \code{getHDF5DumpDir()} to get the exact path).
  }
  \item{filepath}{
    For \code{setHDF5DumpFile}:
    The path (as a single string) to the current \emph{HDF5 dump file},
    that is, to the (new or existing) HDF5 file where the \emph{next
    automatic HDF5 datasets} will be written. If \code{filepath} is
    missing, then a new file with an automatic name will be created
    (in \code{getHDF5DumpDir()}) and used for each new dataset.

    For \code{appendDatasetCreationToHDF5DumpLog}:
    See the Note TO DEVELOPERS below.
  }
  \item{name}{
    For \code{setHDF5DumpName}:
    The name of the \emph{next automatic HDF5 dataset} to be written to
    the current \emph{HDF5 dump file}.

    For \code{appendDatasetCreationToHDF5DumpLog}:
    See the Note TO DEVELOPERS below.
  }
  \item{length}{
    The maximum length of the physical chunks of the \emph{next automatic
    HDF5 dataset} to be written to the current \emph{HDF5 dump file}.
  }
  \item{shape}{
    A string specifying the shape of the physical chunks of the \emph{next
    automatic HDF5 dataset} to be written to the current \emph{HDF5 dump file}.
    See \code{\link[DelayedArray]{makeCappedVolumeBox}} in the
    \pkg{DelayedArray} package for a description of the supported
    shapes.
  }
  \item{level}{
    For \code{setHDF5DumpCompressionLevel}:
    The compression level to use for writing \emph{automatic HDF5 datasets}
    to disk.
    See the \code{level} argument in \code{?rhdf5::\link{h5createDataset}}
    (in the \pkg{rhdf5} package) for more information about this.

    For \code{appendDatasetCreationToHDF5DumpLog}:
    See the Note TO DEVELOPERS below.
  }
  \item{for.use}{
    Whether the returned dataset name is for use by the caller or not.
    See below for the details.
  }
  \item{dim}{
    The dimensions of the HDF5 dataset to be written to disk, that is,
    an integer vector of length one or more giving the maximal indices in each
    dimension.
    See the \code{dims} argument in \code{?rhdf5::\link{h5createDataset}}
    (in the \pkg{rhdf5} package) for more information about this.
  }
  \item{type}{
    The type (a.k.a. storage mode) of the data to be written to disk. Can be
    obtained with \code{\link[DelayedArray]{type}()} on an array-like object
    (which is equivalent to \code{storage.mode()} or \code{typeof()} on an
    ordinary array). This is typically what an application writing datasets
    to the \emph{HDF5 dump} should pass to the \code{storage.mode} argument
    of its call to \code{rhdf5::\link{h5createDataset}}.
    See the Note TO DEVELOPERS below for more information.
  }
  \item{chunkdim}{
    The dimensions of the chunks.
  }
}

\details{
  Calling \code{getHDF5DumpFile()} and \code{getHDF5DumpName()} with no
  argument should be \emph{informative} only i.e. it's a mean for the user
  to know where the \emph{next automatic HDF5 dataset} will be written.
  Since a given file/name combination can be used only once, the user should
  be careful to not use that combination to explicitely create an HDF5 dataset
  because that would get in the way of the creation of the \emph{next
  automatic HDF5 dataset}.
  See the Note TO DEVELOPERS below if you actually need to use this file/name
  combination.

  \code{lsHDF5DumpFile()} is a just convenience wrapper for
  \code{\link{h5ls}(getHDF5DumpFile())}.
}

\value{
  \code{getHDF5DumpDir} returns the absolute path to the directory
  where \emph{HDF5 dump files} with automatic names will be created.
  Only meaningful if the user did NOT specify an \emph{HDF5 dump file}
  with \code{setHDF5DumpFile}.

  \code{getHDF5DumpFile} returns the absolute path to the HDF5 file where
  the \emph{next automatic HDF5 dataset} will be written.

  \code{getHDF5DumpName} returns the name of the \emph{next automatic HDF5
  dataset}.

  \code{getHDF5DumpCompressionLevel} returns the compression level currently
  used for writing \emph{automatic HDF5 datasets} to disk.

  \code{showHDF5DumpLog} returns the dump log in an invisible data frame.

  \code{getHDF5DumpChunkDim} returns the dimensions of the physical chunks
  that will be used to write the dataset to disk.
}

\note{
  TO DEVELOPERS:

  If your application needs to write its own dataset to the \emph{HDF5 dump}
  then it should:
  \enumerate{
    \item Get a file/dataset name combination by calling
          \code{getHDF5DumpFile()} and \code{getHDF5DumpName(for.use=TRUE)}.

    \item [OPTIONAL] Call \code{getHDF5DumpChunkDim(dim)} to get reasonable
          chunk dimensions to use for writing the dataset to disk. Or choose
          your own chunk dimensions.

    \item Add an entry to the dump log by calling
          \code{appendDatasetCreationToHDF5DumpLog}.
          Typically, this should be done right after creating the dataset
          (e.g. with \code{rhdf5::h5createDataset}) and before starting to
          write the dataset to disk. The values passed to
          \code{appendDatasetCreationToHDF5DumpLog} via the \code{filepath},
          \code{name}, \code{dim}, \code{type}, \code{chunkdim}, and
          \code{level} arguments should be those that were passed to
          \code{rhdf5::h5createDataset} via the \code{file}, \code{dataset},
          \code{dims}, \code{storage.mode}, \code{chunk}, and \code{level}
          arguments, respectively.
          Note that \code{appendDatasetCreationToHDF5DumpLog} uses a lock
          mechanism so is safe to use in the context of parallel execution.
  }
  This is actually what the coercion method to \link{HDF5Array} does
  internally.
}

\seealso{
  \itemize{
    \item \code{\link{writeHDF5Array}} for writing an array-like object
          to an HDF5 file.

    \item \link{HDF5Array} objects.

    \item The \code{\link{h5ls}} function on which \code{lsHDF5DumpFile}
          is based.

    \item \code{\link[DelayedArray]{makeCappedVolumeBox}} in the
          \pkg{DelayedArray} package.

    \item \code{\link[DelayedArray]{type}} in the \pkg{DelayedArray} package.
  }
}

\examples{
getHDF5DumpDir()
getHDF5DumpFile()

## Use setHDF5DumpFile() to change the current HDF5 dump file.
## If the specified file exists, then it must be in HDF5 format or
## an error will be raised. If it doesn't exist, then it will be
## created.
#setHDF5DumpFile("path/to/some/HDF5/file")

lsHDF5DumpFile()

a <- array(1:600, c(150, 4))
A <- as(a, "HDF5Array")
lsHDF5DumpFile()
A

b <- array(runif(6000), c(4, 2, 150))
B <- as(b, "HDF5Array")
lsHDF5DumpFile()
B

C <- (log(2 * A + 0.88) - 5)^3 * t(B[ , 1, ])
as(C, "HDF5Array")  # realize C on disk
lsHDF5DumpFile()

## Matrix multiplication is not delayed: the output matrix is realized
## block by block. The current "realization backend" controls where
## realization happens e.g. in memory if set to NULL or in an HDF5 file
## if set to "HDF5Array". See '?realize' in the DelayedArray package for
## more information about "realization backends".
setAutoRealizationBackend("HDF5Array")
m <- matrix(runif(20), nrow=4)
P <- C \%*\% m
lsHDF5DumpFile()

## See all the HDF5 datasets created in the current session so far:
showHDF5DumpLog()

## Wrap the call in suppressMessages() if you are only interested in the
## data frame version of the dump log:
dump_log <- suppressMessages(showHDF5DumpLog())
dump_log
}
\keyword{utilities}