File: H5File-class.Rd

package info (click to toggle)
r-bioc-hdf5array 1.34.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 8,736 kB
  • sloc: ansic: 5,815; makefile: 4
file content (198 lines) | stat: -rw-r--r-- 6,265 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
\name{H5File-class}
\docType{class}

\alias{class:H5FileID}
\alias{H5FileID-class}
\alias{H5FileID}

\alias{open.H5FileID}
\alias{close.H5FileID}
\alias{show,H5FileID-method}

\alias{class:H5File}
\alias{H5File-class}
\alias{H5File}

\alias{path,H5File-method}
\alias{open.H5File}
\alias{close.H5File}
\alias{show,H5File-method}
\alias{coerce,H5File,H5IdComponent-method}

\alias{class:character_OR_H5File}
\alias{character_OR_H5File-class}
\alias{character_OR_H5File}

\alias{class:H5DSetDescriptor}
\alias{H5DSetDescriptor-class}
\alias{H5DSetDescriptor}
\alias{destroy_H5DSetDescriptor}
\alias{show,H5DSetDescriptor-method}

\title{H5File objects}

\description{
  The H5File class provides a formal representation of an HDF5
  file (local or remote).
}

\usage{
## Constructor function:
H5File(filepath, s3=FALSE, s3credentials=NULL, .no_rhdf5_h5id=FALSE)
}

\arguments{
  \item{filepath}{
    A single string specifying the path or URL to an HDF5 file.
  }
  \item{s3}{
    \code{TRUE} or \code{FALSE}. Should the \code{filepath} argument be
    treated as the URL to a file stored in an Amazon S3 bucket, rather than
    the path to a local file?
  }
  \item{s3credentials}{
    A list of length 3, providing the credentials for accessing
    files stored in a private Amazon S3 bucket.
    See \code{?\link[rhdf5]{H5Pset_fapl_ros3}} in the \pkg{rhdf5} package
    for more information.
  }
  \item{.no_rhdf5_h5id}{
    For internal use only. Don't use.
  }
}

\details{
  IMPORTANT NOTE ABOUT H5File OBJECTS AND PARALLEL EVALUATION

  The short story is that H5File objects cannot be used in the context
  of parallel evaluation at the moment.

  Here is why:

  H5File objects contain an identifier to an open connection to the HDF5
  file. This identifier becomes invalid in the 2 following situations:
  \itemize{
    \item After serialization/deserialization, that is, after loading
          a serialized H5File object with \code{\link{readRDS}()} or
          \code{\link{load}()}.
    \item In the context of parallel evaluation, when using the
          \link[BiocParallel]{SnowParam} parallelization backend.
          This is because, unlike the \link[BiocParallel]{MulticoreParam}
          backend which used a system \code{fork}, the
          \link[BiocParallel]{SnowParam} backend uses
          serialization/deserialization to transmit the object
          to the workers.
  }
  In both cases, the connection to the file is lost and any attempt to
  read data from the H5File object will fail. Note that the above also
  happens to any H5File object that got serialized indirectly i.e. as
  part of a bigger object. For example, if an \link{HDF5Array} object
  was constructed from an H5File object, then it contains the H5File
  object and therefore
  \code{\link[DelayedArray]{blockApply}(..., BPPARAM=SnowParam(4))}
  cannot be used on it.

  Furthermore, even if sometimes an H5File object \emph{seems} to work
  fine with the \link[BiocParallel]{MulticoreParam} parallelization
  backend, this is highly unreliable and must be avoided.
}

\value{
  An H5File object.
}

\seealso{
  \itemize{
    \item \link[rhdf5]{H5Pset_fapl_ros3} in the \pkg{rhdf5} package for
          detailed information about how to pass your S3 credentials
          to the \code{s3credentials} argument.

    \item The \link{HDF5Array} class for representing and operating on
          a conventional (a.k.a. dense) HDF5 dataset.

    \item The \link{H5SparseMatrix} class for representing and operating on
          an HDF5 sparse matrix.

    \item The \link{H5ADMatrix} class for representing and operating on
          the central matrix of an \code{h5ad} file, or any matrix in
          its \code{/layers} group.

    \item The \link{TENxMatrix} class for representing and operating on
          a 10x Genomics dataset.

    \item The \code{\link{h5mread}} function in this package (\pkg{HDF5Array})
          that is used internally by \link{HDF5Array}, \link{TENxMatrix},
          and \link{H5ADMatrix} objects, for (almost) all their data
          reading needs.

    \item \code{\link{h5ls}} to list the content of an HDF5 file.

    \item \code{\link[BiocParallel]{bplapply}},
          \code{\link[BiocParallel]{MulticoreParam}}, and
          \code{\link[BiocParallel]{SnowParam}}, in the
          \pkg{BiocParallel} package.
  }
}

\examples{
## ---------------------------------------------------------------------
## A. BASIC USAGE
## ---------------------------------------------------------------------

## With a local file:
toy_h5 <- system.file("extdata", "toy.h5", package="HDF5Array")
h5file1 <- H5File(toy_h5)
h5ls(h5file1)
path(h5file1)

h5mread(h5file1, "M2", list(1:10, 1:6))
get_h5mread_returned_type(h5file1, "M2")

## With a file stored in an Amazon S3 bucket:
if (Sys.info()[["sysname"]] != "Darwin") {
  public_S3_url <-
   "https://rhdf5-public.s3.eu-central-1.amazonaws.com/rhdf5ex_t_float_3d.h5"
  h5file2 <- H5File(public_S3_url, s3=TRUE)
  h5ls(h5file2)

  h5mread(h5file2, "a1")
  get_h5mread_returned_type(h5file2, "a1")
}

## ---------------------------------------------------------------------
## B. H5File OBJECTS AND PARALLEL EVALUATION
## ---------------------------------------------------------------------
## H5File objects cannot be used in the context of parallel evaluation
## at the moment!

library(BiocParallel)

FUN1 <- function(i, h5file, name)
    sum(HDF5Array::h5mread(h5file, name, list(i, NULL)))

FUN2 <- function(i, h5file, name)
    sum(HDF5Array::h5mread(h5file, name, list(i, NULL, NULL)))

## With the SnowParam parallelization backend, the H5File object
## does NOT work on the workers:
\dontrun{
## ERROR!
res1 <- bplapply(1:150, FUN1, h5file1, "M2", BPPARAM=SnowParam(3))
## ERROR!
res2 <- bplapply(1:5, FUN2, h5file2, "a1", BPPARAM=SnowParam(3))
}

## With the MulticoreParam parallelization backend, the H5File object
## might seem to work on the workers. However this is highly unreliable
## and must be avoided:
\dontrun{
if (.Platform$OS.type != "windows") {
  ## UNRELIABLE!
  res1 <- bplapply(1:150, FUN1, h5file1, "M2", BPPARAM=MulticoreParam(3))
  ## UNRELIABLE!
  res2 <- bplapply(1:5, FUN2, h5file2, "a1", BPPARAM=MulticoreParam(3))
}
}
}
\keyword{classes}
\keyword{methods}