1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
|
\name{writeTENxMatrix}
\alias{class:TENxRealizationSink}
\alias{TENxRealizationSink-class}
\alias{TENxRealizationSink}
\alias{dimnames,TENxRealizationSink-method}
\alias{type,TENxRealizationSink-method}
\alias{chunkdim,TENxRealizationSink-method}
\alias{write_block,TENxRealizationSink-method}
\alias{close,TENxRealizationSink-method}
\alias{coerce,TENxRealizationSink,TENxMatrixSeed-method}
\alias{coerce,TENxRealizationSink,TENxMatrix-method}
\alias{coerce,TENxRealizationSink,DelayedArray-method}
\alias{writeTENxMatrix}
\alias{coerce,ANY,TENxMatrix-method}
\alias{coerce,DelayedArray,TENxMatrix-method}
\alias{coerce,DelayedMatrix,TENxMatrix-method}
\title{Write a matrix-like object as an HDF5-based sparse matrix}
\description{
The 1.3 Million Brain Cell Dataset and other datasets published by
10x Genomics use an HDF5-based sparse matrix representation instead
of the conventional (a.k.a. dense) HDF5 representation.
\code{writeTENxMatrix} writes a matrix-like object to this format.
IMPORTANT NOTE: Only use \code{writeTENxMatrix} if the matrix-like
object to write is sparse, that is, if most of its elements are zero.
Using \code{writeTENxMatrix} on dense data is very inefficient!
In this case, you should use \code{\link{writeHDF5Array}} instead.
}
\usage{
writeTENxMatrix(x, filepath=NULL, group=NULL, level=NULL, verbose=NA)
}
\arguments{
\item{x}{
The matrix-like object to write to an HDF5 file.
The object to write should typically be sparse, that is, most of its
elements should be zero.
If \code{x} is a \link{DelayedMatrix} object, \code{writeTENxMatrix}
\emph{realizes} it on disk, that is, all the delayed operations carried
by the object are executed while the object is written to disk.
}
\item{filepath}{
\code{NULL} or the path (as a single string) to the (new or existing)
HDF5 file where to write the data.
If \code{NULL}, then the data will be written to the current \emph{HDF5
dump file} i.e. to the file whose path is \code{\link{getHDF5DumpFile}}.
}
\item{group}{
\code{NULL} or the name of the HDF5 group where to write the data.
If \code{NULL}, then the name returned by \code{\link{getHDF5DumpName}}
will be used.
}
\item{level}{
The compression level to use for writing the data to disk.
By default, \code{getHDF5DumpCompressionLevel()} will be used.
See \code{?\link{getHDF5DumpCompressionLevel}} for more information.
}
\item{verbose}{
Whether block processing progress should be displayed or not.
If set to \code{NA} (the default), verbosity is controlled
by \code{DelayedArray:::get_verbose_block_processing()}.
Setting \code{verbose} to \code{TRUE} or \code{FALSE} overrides this.
}
}
\details{
Please note that, depending on the size of the data to write to disk
and the performance of the disk, \code{writeTENxMatrix} can take a long
time to complete. Use \code{verbose=TRUE} to see its progress.
Use \code{\link{setHDF5DumpFile}} and \code{\link{setHDF5DumpName}} to
control the location of automatically created HDF5 datasets.
}
\value{
A \link{TENxMatrix} object pointing to the newly written HDF5 data on disk.
}
\seealso{
\itemize{
\item \link{TENxMatrix} objects.
\item The \code{\link[TENxBrainData]{TENxBrainData}} dataset (in the
\pkg{TENxBrainData} package).
\item \link{HDF5-dump-management} to control the location and
physical properties of automatically created HDF5 datasets.
\item \code{\link{h5ls}} to list the content of an HDF5 file.
}
}
\examples{
## ---------------------------------------------------------------------
## A SIMPLE EXAMPLE
## ---------------------------------------------------------------------
m0 <- matrix(0L, nrow=25, ncol=12,
dimnames=list(letters[1:25], LETTERS[1:12]))
m0[cbind(2:24, c(12:1, 2:12))] <- 100L + sample(55L, 23, replace=TRUE)
out_file <- tempfile()
M0 <- writeTENxMatrix(m0, out_file, group="m0")
M0
sparsity(M0)
path(M0) # same as 'out_file'
## Use h5ls() to list the content of this HDF5 file:
h5ls(path(M0))
## ---------------------------------------------------------------------
## USING THE "1.3 Million Brain Cell Dataset"
## ---------------------------------------------------------------------
## The 1.3 Million Brain Cell Dataset from 10x Genomics is available via
## ExperimentHub:
library(ExperimentHub)
hub <- ExperimentHub()
query(hub, "TENxBrainData")
fname <- hub[["EH1039"]]
oneM <- TENxMatrix(fname, group="mm10") # see ?TENxMatrix for the details
oneM
## Note that the following transformation preserves sparsity:
M2 <- log(oneM + 1) # delayed
M2 # a DelayedMatrix instance
## In order to reduce computation times, we'll write only the first
## 5000 columns of M2 to disk:
out_file <- tempfile()
M3 <- writeTENxMatrix(M2[ , 1:5000], out_file, group="mm10", verbose=TRUE)
M3 # a TENxMatrix instance
}
\keyword{methods}
|