File: blockGrid.R

package info (click to toggle)
r-bioc-delayedarray 0.8.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 980 kB
  • sloc: ansic: 93; makefile: 2; sh: 1
file content (301 lines) | stat: -rw-r--r-- 10,924 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
### =========================================================================
### blockGrid() and family
### -------------------------------------------------------------------------
###


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### set/getAutoBlockSize()
###
### The automatic block size must be specified in bytes.
###

### We set the automatic block size to 100 Mb by default.
set_auto.block.size <- function(size=1e8)
{
    set_user_option("auto.block.size", size)
}

setAutoBlockSize <- function(size=1e8)
{
    if (!isSingleNumber(size) || size < 1)
        stop(wmsg("the block size must be a single number >= 1"))
    prev_size <- get_user_option("auto.block.size")
    set_auto.block.size(size)
    message("automatic block size set to ", size, " bytes ",
            "(was ", prev_size, ")")
    invisible(size)
}

getAutoBlockSize <- function()
{
    size <- get_user_option("auto.block.size")
    if (!isSingleNumber(size) || size < 1)
        stop(wmsg("DelayedArray user-controlled global option ",
                  "auto.block.size should be a single number >= 1. ",
                  "Fix it with setAutoBlockSize()."))
    size
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### getAutoBlockLength()
###

### The elements of a character vector or a list have a variable size.
### For a character vector: the minimum size of an element is 8 bytes which
### is the overhead of a CHARSXP object. This doesn't account for the string
### data itself.
### For a list: the minimum size of a list element is 8 bytes and is obtained
### when the element is a NULL. However, assuming that a list will typically
### contain more non-NULL than NULL elements and that the non-NULL elements
### will typically be atomic vectors, the average element size is more likely
### to be >= the overhead of an atomic vector which is 56 bytes.
get_type_size <- function(type)
{
    ### Atomic type sizes in bytes.
    TYPE_SIZES <- c(
        logical=4L,
        integer=4L,
        numeric=8L,
        double=8L,
        complex=16L,
        character=8L,  # overhead of a CHARSXP object
        raw=1L,
        list=56L       # overhead of an atomic vector
    )
    if (missing(type))
        return(TYPE_SIZES)
    if (is.factor(type)) {
        type <- as.character(type)
    } else if (!is.character(type)) {
        stop(wmsg("'type' must be a character vector or factor"))
    }
    if (any(type %in% ""))
        stop(wmsg("'type' cannot contain empty strings"))
    idx <- which(!(type %in% c(names(TYPE_SIZES), NA_character_)))
    if (length(idx) != 0L) {
        unsupported_types <- unique(type[idx])
        in1string <- paste0(unsupported_types, collapse=", ")
        stop(wmsg("unsupported type(s): ",  in1string))
    }
    TYPE_SIZES[type]
}

getAutoBlockLength <- function(type)
{
    if (missing(type))
        stop(wmsg("Please specify the type of the array data. ",
                  "See ?getAutoBlockLength"))
    if (!isSingleString(type))
        stop(wmsg("'type' must be a single string"))
    type_size <- get_type_size(type)
    block_size <- getAutoBlockSize()
    ans <- block_size / type_size
    if (ans > .Machine$integer.max)
        stop(wmsg("Automatic block length is too big. Blocks of ",
                  "length > .Machine$integer.max are not supported yet. ",
                  "Please reduce the automatic block length by reducing ",
                  "the automatic block size with setAutoBlockSize()."))
    max(as.integer(ans), 1L)
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### set/getAutoBlockShape()
###

.SUPPORTED_SHAPES <- c("hypercube",
                       "scale",
                       "first-dim-grows-first",
                       "last-dim-grows-first")

### We set the automatic block shape to "hypercube" by default.
set_auto.block.shape <- function(shape="hypercube")
{
    set_user_option("auto.block.shape", shape)
}

setAutoBlockShape <- function(shape=c("hypercube",
                                      "scale",
                                      "first-dim-grows-first",
                                      "last-dim-grows-first"))
{
    shape <- match.arg(shape)
    prev_shape <- get_user_option("auto.block.shape")
    set_auto.block.shape(shape)
    message("automatic block shape set to \"", shape, "\" ",
             "(was \"", prev_shape, "\")")
    invisible(shape)
}

getAutoBlockShape <- function()
{
    shape <- get_user_option("auto.block.shape")
    if (!(isSingleString(shape) && shape %in% .SUPPORTED_SHAPES)) {
        in1string <- paste(paste0("\"", .SUPPORTED_SHAPES, "\""), collapse=", ")
        stop(wmsg("DelayedArray user-controlled global option ",
                  "auto.block.shape should be one of: ", in1string, ". ",
                  "Fix it with setAutoBlockShape()."))
    }
    shape
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### blockGrid()
###

### Guaranteed to return an integer >= 1.
.normarg_block.length <- function(block.length, type)
{
    if (is.null(block.length))
        return(getAutoBlockLength(type))
    if (!isSingleNumber(block.length))
        stop(wmsg("'block.length' must be a single integer or NULL"))
    if (block.length < 1)
        stop(wmsg("'block.length' must be >= 1"))
    if (block.length > .Machine$integer.max)
        stop(wmsg("'block.length' is too big. Blocks of ",
                  "length > .Machine$integer.max are not supported yet. ",
                  "Please specify a smaller 'block.length'."))
    as.integer(block.length)
}

.normarg_chunk.grid <- function(chunk.grid, x)
{
    if (is.null(chunk.grid))
        return(chunkGrid(x))
    if (!is(chunk.grid, "ArrayGrid"))
        stop(wmsg("'chunk.grid' must be an ArrayGrid object or NULL"))
    if (!identical(refdim(chunk.grid), dim(x)))
        stop(wmsg("'chunk.grid' is incompatible with 'x'"))
    chunk.grid
}

.normarg_block.shape <- function(block.shape)
{
    if (is.null(block.shape))
        return(getAutoBlockShape())
    if (!(isSingleString(block.shape) && block.shape %in% .SUPPORTED_SHAPES)) {
        in1string <- paste(paste0("\"", .SUPPORTED_SHAPES, "\""), collapse=", ")
        stop(wmsg("'block.shape' must be one of ", in1string, ", or NULL"))
    }
    block.shape
}

### Return a grid that is "optimal" for block processing of array-like
### object 'x'.
### The grid is returned as an ArrayGrid object on reference array 'x'.
### The grid elements define the blocks that will be used for processing 'x'
### by block. The grid is "optimal" in the sense that:
###  - It's "compatible" with the chunk grid (i.e. with 'chunkGrid(x)' or
###    with the chunk grid supplied via the 'chunk.grid' argument), that is,
###    the chunks are contained in the blocks. In other words, chunks never
###    cross block boundaries.
###  - Its "resolution" is such that the blocks have a length that is as
###    close as possibe to (but does not exceed) 'block.length'.
###    An exception is when some chunks are already >= 'block.length',
###    in which case the returned grid is the same as the chunk grid.
### Note that the returned grid is regular (i.e. RegularArrayGrid object)
### unless the chunk grid is not regular (i.e. is an ArbitraryArrayGrid
### object).
blockGrid <- function(x, block.length=NULL, chunk.grid=NULL, block.shape=NULL)
{
    x_dim <- dim(x)
    if (is.null(x_dim))
        stop(wmsg("'x' must be an array-like object"))
    block_len <- .normarg_block.length(block.length, type(x))
    chunk_grid <- .normarg_chunk.grid(chunk.grid, x)
    block_shape <- .normarg_block.shape(block.shape)
    ## If 'x' is empty, we return a grid with a single (empty) block that
    ## has the dimensions of 'x'.
    if (any(x_dim == 0L))
        return(RegularArrayGrid(x_dim))
    if (is.null(chunk_grid)) {
        ans <- makeRegularArrayGridOfCappedLengthViewports(x_dim,
                                                           block_len,
                                                           block_shape)
        return(ans)
    }
    chunks_per_block <- max(block_len %/% maxlength(chunk_grid), 1L)
    ratio <- makeCappedVolumeBox(chunks_per_block, dim(chunk_grid), block_shape)
    downsample(chunk_grid, ratio)
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Two additional functions specific to the 2-dimensional case
###
### Both return a RegularArrayGrid object.
###

.get_auto_nrow <- function(x_dim, block.length, x_type)
{
    x_nrow <- x_dim[[1L]]
    x_ncol <- x_dim[[2L]]
    block_len <- .normarg_block.length(block.length, x_type)
    nrow <- block_len %/% x_ncol
    if (nrow < 1L)
        return(1L)
    if (nrow > x_nrow)
        return(x_nrow)
    nrow
}

### Define blocks of full rows.
rowGrid <- function(x, nrow=NULL, block.length=NULL)
{
    x_dim <- dim(x)
    if (length(x_dim) != 2L)
        stop(wmsg("'x' must have exactly 2 dimensions"))
    x_nrow <- x_dim[[1L]]
    x_ncol <- x_dim[[2L]]
    if (is.null(nrow)) {
        nrow <- .get_auto_nrow(x_dim, block.length, type(x))
        spacings <- c(nrow, x_ncol)
    } else {
        if (!is.null(block.length))
            warning("'block.length' is ignored when 'nrow' is not NULL")
        if (!isSingleNumber(nrow))
            stop(wmsg("'nrow' must be a single integer or NULL"))
        nrow <- as.integer(nrow)
        if (nrow < 1L || nrow > x_nrow)
            stop(wmsg("'nrow' must be >= 1 and <= nrow(x)"))
        spacings <- c(nrow, x_ncol)
        if (prod(spacings) > .Machine$integer.max)
            stop(wmsg("'nrow' is too big. Blocks of length > ",
                      ".Machine$integer.max are not supported yet. ",
                      "Please specify a smaller 'nrow'."))
    }
    RegularArrayGrid(x_dim, spacings)
}

### Define blocks of full columns.
colGrid <- function(x, ncol=NULL, block.length=NULL)
{
    x_dim <- dim(x)
    if (length(x_dim) != 2L)
        stop(wmsg("'x' must have exactly 2 dimensions"))
    x_nrow <- x_dim[[1L]]
    x_ncol <- x_dim[[2L]]
    if (is.null(ncol)) {
        ncol <- .get_auto_nrow(rev(x_dim), block.length, type(x))
        spacings <- c(x_nrow, ncol)
    } else {
        if (!is.null(block.length))
            warning("'block.length' is ignored when 'ncol' is not NULL")
        if (!isSingleNumber(ncol))
            stop(wmsg("'ncol' must be a single integer or NULL"))
        ncol <- as.integer(ncol)
        if (ncol < 1L || ncol > x_ncol)
            stop(wmsg("'ncol' must be >= 1 and <= ncol(x)"))
        spacings <- c(x_nrow, ncol)
        if (prod(spacings) > .Machine$integer.max)
            stop(wmsg("'ncol' is too big. Blocks of length > ",
                      ".Machine$integer.max are not supported yet. ",
                      "Please specify a smaller 'ncol'."))
    }
    RegularArrayGrid(x_dim, spacings)
}