1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
|
\name{writeHDF5Array}
\alias{class:HDF5RealizationSink}
\alias{HDF5RealizationSink-class}
\alias{HDF5RealizationSink}
\alias{dimnames,HDF5RealizationSink-method}
\alias{type,HDF5RealizationSink-method}
\alias{chunkdim,HDF5RealizationSink-method}
\alias{is_sparse,HDF5RealizationSink-method}
\alias{write_block,HDF5RealizationSink-method}
\alias{coerce,HDF5RealizationSink,HDF5ArraySeed-method}
\alias{coerce,HDF5RealizationSink,HDF5Array-method}
\alias{coerce,HDF5RealizationSink,DelayedArray-method}
\alias{writeHDF5Array}
\alias{coerce,ANY,HDF5Array-method}
\alias{coerce,DelayedArray,HDF5Array-method}
\alias{coerce,DelayedMatrix,HDF5Matrix-method}
\title{Write an array-like object to an HDF5 file}
\description{
A function for writing an array-like object to an HDF5 file.
}
\usage{
writeHDF5Array(x, filepath=NULL, name=NULL,
H5type=NULL, chunkdim=NULL, level=NULL, as.sparse=NA,
with.dimnames=TRUE, verbose=NA)
}
\arguments{
\item{x}{
The array-like object to write to an HDF5 file.
If \code{x} is a \link{DelayedArray} object, \code{writeHDF5Array}
\emph{realizes} it on disk, that is, all the delayed operations carried
by the object are executed while the object is written to disk.
See "On-disk realization of a DelayedArray object as an HDF5 dataset"
section below for more information.
}
\item{filepath}{
\code{NULL} or the path (as a single string) to the (new or existing)
HDF5 file where to write the dataset.
If \code{NULL}, then the dataset will be written to the current \emph{HDF5
dump file} i.e. to the file whose path is \code{\link{getHDF5DumpFile}}.
}
\item{name}{
\code{NULL} or the name of the HDF5 dataset to write.
If \code{NULL}, then the name returned by \code{\link{getHDF5DumpName}}
will be used.
}
\item{H5type}{
The H5 datatype to use for the HDF5 dataset to be written to the HDF5
file is automatically inferred from the type of \code{x} (\code{type(x)}).
Advanced users can override this by specifying the H5 datatype they want
via the \code{H5type} argument.
See \code{rhdf5::h5const("H5T")} for a list of available H5 datatypes.
See References section below for the link to the HDF Group's Support
Portal where H5 predefined datatypes are documented.
A typical use case is to use a datatype that is smaller than the
automatic one in order to reduce the size of the dataset on disk.
For example you could use \code{"H5T_IEEE_F32LE"} when \code{type(x)}
is \code{"double"} and you don't care about preserving the precision
of 64-bit floating-point numbers (the automatic H5 datatype used for
\code{"double"} is \code{"H5T_IEEE_F64LE"}).
Another example is to use \code{"H5T_STD_U16LE"} when \code{x} contains
small non-negative integer values like counts (the automatic H5
datatype used for \code{"integer"} is \code{"H5T_STD_I32LE"}).
}
\item{chunkdim}{
The dimensions of the chunks to use for writing the data to disk.
By default (i.e. when \code{chunkdim} is set to \code{NULL}),
\code{getHDF5DumpChunkDim(dim(x))} will be used.
See \code{?\link{getHDF5DumpChunkDim}} for more information.
Set \code{chunkdim} to 0 to write \emph{unchunked data} (a.k.a.
\emph{contiguous data}).
}
\item{level}{
The compression level to use for writing the data to disk.
By default, \code{getHDF5DumpCompressionLevel()} will be used.
See \code{?\link{getHDF5DumpCompressionLevel}} for more information.
}
\item{as.sparse}{
Whether the data in the returned \link{HDF5Array} object should be
flagged as sparse or not. If set to \code{NA} (the default), then
\code{is_sparse(x)} is used.
IMPORTANT NOTE: This only controls the \code{as.sparse} flag of
the returned \link{HDF5Array} object. See man page of the
\code{\link{HDF5Array}()} constructor for more information.
In particular this does NOT affect how the data will be laid out
in the HDF5 file in any way (HDF5 doesn't natively support sparse
storage at the moment). In other words, the data will always be
stored in a dense format, even when \code{as.sparse} is set to
\code{TRUE}.
}
\item{with.dimnames}{
Whether the dimnames on \code{x} should also be written to the HDF5
file or not. \code{TRUE} by default.
Note that \code{\link{h5writeDimnames}} is used internally to write
the dimnames to disk. Setting \code{with.dimnames} to \code{FALSE}
and calling \code{\link{h5writeDimnames}} is another way to write
the dimnames on \code{x} to disk that gives more control.
See \code{?\link{h5writeDimnames}} for more information.
}
\item{verbose}{
Whether block processing progress should be displayed or not.
If set to \code{NA} (the default), verbosity is controlled
by \code{DelayedArray:::get_verbose_block_processing()}.
Setting \code{verbose} to \code{TRUE} or \code{FALSE} overrides this.
}
}
\details{
Please note that, depending on the size of the data to write to disk
and the performance of the disk, \code{writeHDF5Array()} can take a
long time to complete. Use \code{verbose=TRUE} to see its progress.
Use \code{\link{setHDF5DumpFile}} and \code{\link{setHDF5DumpName}} to
control the location of automatically created HDF5 datasets.
Use \code{\link{setHDF5DumpChunkLength}},
\code{\link{setHDF5DumpChunkShape}}, and
\code{\link{setHDF5DumpCompressionLevel}}, to control the
physical properties of automatically created HDF5 datasets.
}
\value{
An \link{HDF5Array} object pointing to the newly written HDF5 dataset
on disk.
}
\section{On-disk realization of a DelayedArray object as an HDF5 dataset}{
When passed a \link{DelayedArray} object, \code{writeHDF5Array}
\emph{realizes} it on disk, that is, all the delayed operations carried
by the object are executed on-the-fly while the object is written to disk.
This uses a block-processing strategy so that the full object is not
realized at once in memory. Instead the object is processed block by block
i.e. the blocks are realized in memory and written to disk one at a time.
In other words, \code{writeHDF5Array(x, ...)} is semantically equivalent
to \code{writeHDF5Array(as.array(x), ...)}, except that \code{as.array(x)}
is not called because this would realize the full object at once in memory.
See \code{?\link{DelayedArray}} for general information about
\link{DelayedArray} objects.
}
\references{
Documentation of the H5 predefined datatypes on the HDF Group's Support
Portal: \url{https://portal.hdfgroup.org/display/HDF5/Predefined+Datatypes}
}
\seealso{
\itemize{
\item \link{HDF5Array} objects.
\item \code{\link{h5writeDimnames}} for writing the dimnames of an
HDF5 dataset to disk.
\item \code{\link{saveHDF5SummarizedExperiment}} and
\code{\link{loadHDF5SummarizedExperiment}} in this
package (the \pkg{HDF5Array} package) for saving/loading
an HDF5-based \link[SummarizedExperiment]{SummarizedExperiment}
object to/from disk.
\item \link{HDF5-dump-management} to control the location and
physical properties of automatically created HDF5 datasets.
\item \code{\link{h5ls}} to list the content of an HDF5 file.
}
}
\examples{
## ---------------------------------------------------------------------
## WRITE AN ORDINARY ARRAY TO AN HDF5 FILE
## ---------------------------------------------------------------------
m0 <- matrix(runif(364, min=-1), nrow=26,
dimnames=list(letters, LETTERS[1:14]))
h5file <- tempfile(fileext=".h5")
M1 <- writeHDF5Array(m0, h5file, name="M1", chunkdim=c(5, 5))
M1
chunkdim(M1)
## By default, writeHDF5Array() writes the dimnames to the HDF5 file:
dimnames(M1) # same as 'dimnames(m0)'
## Use 'with.dimnames=FALSE' to not write the dimnames to the file:
M1b <- writeHDF5Array(m0, h5file, name="M1b", with.dimnames=FALSE)
dimnames(M1b) # no dimnames
## With sparse data:
sm <- rsparsematrix(20, 8, density=0.1)
M2 <- writeHDF5Array(sm, h5file, name="M2", chunkdim=c(5, 5))
M2
is_sparse(M2) # TRUE
## ---------------------------------------------------------------------
## WRITE A DelayedArray OBJECT TO AN HDF5 FILE
## ---------------------------------------------------------------------
M3 <- log(t(DelayedArray(m0)) + 1)
M3 <- writeHDF5Array(M3, h5file, name="M3", chunkdim=c(5, 5))
M3
chunkdim(M3)
library(h5vcData)
tally_file <- system.file("extdata", "example.tally.hfs5",
package="h5vcData")
h5ls(tally_file)
cvg0 <- HDF5Array(tally_file, "/ExampleStudy/16/Coverages")
cvg1 <- cvg0[ , , 29000001:29000007]
writeHDF5Array(cvg1, h5file, "cvg1")
h5ls(h5file)
}
\keyword{methods}
|