File: writeTENxMatrix.Rd

package info (click to toggle)
r-bioc-hdf5array 1.34.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 8,736 kB
  • sloc: ansic: 5,815; makefile: 4
file content (144 lines) | stat: -rw-r--r-- 4,988 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
\name{writeTENxMatrix}

\alias{class:TENxRealizationSink}
\alias{TENxRealizationSink-class}
\alias{TENxRealizationSink}

\alias{dimnames,TENxRealizationSink-method}
\alias{type,TENxRealizationSink-method}
\alias{chunkdim,TENxRealizationSink-method}
\alias{write_block,TENxRealizationSink-method}
\alias{close,TENxRealizationSink-method}

\alias{coerce,TENxRealizationSink,TENxMatrixSeed-method}
\alias{coerce,TENxRealizationSink,TENxMatrix-method}
\alias{coerce,TENxRealizationSink,DelayedArray-method}

\alias{writeTENxMatrix}

\alias{coerce,ANY,TENxMatrix-method}
\alias{coerce,DelayedArray,TENxMatrix-method}
\alias{coerce,DelayedMatrix,TENxMatrix-method}

\title{Write a matrix-like object as an HDF5-based sparse matrix}

\description{
  The 1.3 Million Brain Cell Dataset and other datasets published by
  10x Genomics use an HDF5-based sparse matrix representation instead
  of the conventional (a.k.a. dense) HDF5 representation.

  \code{writeTENxMatrix} writes a matrix-like object to this format.

  IMPORTANT NOTE: Only use \code{writeTENxMatrix} if the matrix-like
  object to write is sparse, that is, if most of its elements are zero.
  Using \code{writeTENxMatrix} on dense data is very inefficient!
  In this case, you should use \code{\link{writeHDF5Array}} instead.
}

\usage{
writeTENxMatrix(x, filepath=NULL, group=NULL, level=NULL, verbose=NA)
}

\arguments{
  \item{x}{
    The matrix-like object to write to an HDF5 file.

    The object to write should typically be sparse, that is, most of its
    elements should be zero.

    If \code{x} is a \link{DelayedMatrix} object, \code{writeTENxMatrix}
    \emph{realizes} it on disk, that is, all the delayed operations carried
    by the object are executed while the object is written to disk.
  }
  \item{filepath}{
    \code{NULL} or the path (as a single string) to the (new or existing)
    HDF5 file where to write the data.
    If \code{NULL}, then the data will be written to the current \emph{HDF5
    dump file} i.e. to the file whose path is \code{\link{getHDF5DumpFile}}.
  }
  \item{group}{
    \code{NULL} or the name of the HDF5 group where to write the data.
    If \code{NULL}, then the name returned by \code{\link{getHDF5DumpName}}
    will be used.
  }
  \item{level}{
    The compression level to use for writing the data to disk.
    By default, \code{getHDF5DumpCompressionLevel()} will be used.
    See \code{?\link{getHDF5DumpCompressionLevel}} for more information.
  }
  \item{verbose}{
    Whether block processing progress should be displayed or not.
    If set to \code{NA} (the default), verbosity is controlled
    by \code{DelayedArray:::get_verbose_block_processing()}.
    Setting \code{verbose} to \code{TRUE} or \code{FALSE} overrides this.
  }
}

\details{
  Please note that, depending on the size of the data to write to disk
  and the performance of the disk, \code{writeTENxMatrix} can take a long
  time to complete. Use \code{verbose=TRUE} to see its progress.

  Use \code{\link{setHDF5DumpFile}} and \code{\link{setHDF5DumpName}} to
  control the location of automatically created HDF5 datasets.
}

\value{
  A \link{TENxMatrix} object pointing to the newly written HDF5 data on disk.
}

\seealso{
  \itemize{
    \item \link{TENxMatrix} objects.

    \item The \code{\link[TENxBrainData]{TENxBrainData}} dataset (in the
          \pkg{TENxBrainData} package).

    \item \link{HDF5-dump-management} to control the location and
          physical properties of automatically created HDF5 datasets.

    \item \code{\link{h5ls}} to list the content of an HDF5 file.
  }
}

\examples{
## ---------------------------------------------------------------------
## A SIMPLE EXAMPLE
## ---------------------------------------------------------------------
m0 <- matrix(0L, nrow=25, ncol=12,
             dimnames=list(letters[1:25], LETTERS[1:12]))
m0[cbind(2:24, c(12:1, 2:12))] <- 100L + sample(55L, 23, replace=TRUE)
out_file <- tempfile()
M0 <- writeTENxMatrix(m0, out_file, group="m0")
M0
sparsity(M0)

path(M0)  # same as 'out_file'

## Use h5ls() to list the content of this HDF5 file:
h5ls(path(M0))

## ---------------------------------------------------------------------
## USING THE "1.3 Million Brain Cell Dataset"
## ---------------------------------------------------------------------

## The 1.3 Million Brain Cell Dataset from 10x Genomics is available via
## ExperimentHub:
library(ExperimentHub)
hub <- ExperimentHub()
query(hub, "TENxBrainData")
fname <- hub[["EH1039"]]
oneM <- TENxMatrix(fname, group="mm10")  # see ?TENxMatrix for the details
oneM

## Note that the following transformation preserves sparsity:
M2 <- log(oneM + 1)  # delayed
M2                   # a DelayedMatrix instance

## In order to reduce computation times, we'll write only the first
## 5000 columns of M2 to disk:
out_file <- tempfile()
M3 <- writeTENxMatrix(M2[ , 1:5000], out_file, group="mm10", verbose=TRUE)
M3                   # a TENxMatrix instance
}
\keyword{methods}