1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
|
### =========================================================================
### Bind multidimensional arrays along an arbitrary dimension
### -------------------------------------------------------------------------
.normarg_along <- function(along)
{
if (!isSingleNumber(along))
stop(wmsg("'along' must be a single number"))
if (!is.integer(along))
along <- as.integer(along)
if (along <= 0L)
stop(wmsg("'along' must be a positive integer"))
along
}
### Return a matrix with one row per dim and one column per object if the
### objects are "bindable". Otherwise return a string describing why they
### are not. This design allows the function to be used in the context of
### a validity method.
get_dims_to_bind <- function(objects, along)
{
stopifnot(is.list(objects))
along <- .normarg_along(along)
dims <- lapply(objects, dim)
ndims <- lengths(dims)
if (any(ndims < along))
stop(wmsg("the array-like objects to bind must have at least ",
along, " dimensions for this binding operation"))
ndim <- ndims[[1L]]
if (!all(ndims == ndim))
return(paste0("all the objects to bind must have ",
"the same number of dimensions"))
tmp <- unlist(dims, use.names=FALSE)
if (is.null(tmp))
return("the objects to bind have no dimensions")
dims <- matrix(tmp, nrow=ndim)
tmp <- dims[-along, , drop=FALSE]
if (!all(tmp == tmp[ , 1L]))
return("the objects to bind have incompatible dimensions")
dims
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### combine_dims_along() and combine_dimnames_along()
###
### Compute the expected dims and dimnames of abind()'s result.
### Both combine_dims_along() and combine_dimnames_along() generalize what
### rbind/cbind do in the 2D case to the multidimensional case.
###
### A NOTE ABOUT CRAN PACKAGE abind 1.4-5: By default, abind::abind() does
### NOT follow the same rules as rbind() and cbind() for propagation of the
### dimnames. This is despite its man page (?abind::abind) claiming that it
### does (see documentation of the 'use.first.dimnames' argument), when in
### fact it has it backward! Very misleading!
### The abind0() function defined below in this file is a simple wrapper
### around abind::abind() that fixes that.
### Combine the dims the rbind/cbind way.
combine_dims_along <- function(dims, along)
{
stopifnot(is.matrix(dims))
along <- .normarg_along(along)
stopifnot(along <= nrow(dims))
ans_dim <- dims[ , 1L]
ans_dim[[along]] <- sum(dims[along, ])
ans_dim
}
### Assume all the arrays in 'objects' have the same number of dimensions.
.combine_dimnames <- function(objects)
{
lapply(seq_along(dim(objects[[1L]])),
function(n) {
for (object in objects) {
dn <- dimnames(object)[[n]]
if (!is.null(dn))
return(dn)
}
NULL
})
}
### Combine the dimnames the rbind/cbind way.
combine_dimnames_along <- function(objects, dims, along)
{
stopifnot(is.matrix(dims),
isSingleInteger(along), along >= 1L, along <= nrow(dims))
dimnames <- .combine_dimnames(objects)
along_names <- lapply(objects, function(object) dimnames(object)[[along]])
along_names_lens <- lengths(along_names)
if (any(along_names_lens != 0L)) {
fix_idx <- which(along_names_lens != dims[along, ])
along_names[fix_idx] <- lapply(dims[along, fix_idx], character)
}
along_names <- unlist(along_names, use.names=FALSE)
if (!is.null(along_names))
dimnames[[along]] <- along_names
simplify_NULL_dimnames(dimnames)
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### simple_abind()
###
### 'objects' is assumed to be a list of vector-like objects.
### 'nblock' is assumed to be a single integer value (stored as a numeric)
### that is a common divisor of the object lengths.
.intertwine_blocks <- function(objects, nblock, ans_dim)
{
x0 <- unlist(lapply(objects, `[`, 0L), recursive=FALSE, use.names=FALSE)
objects_lens <- lengths(objects)
if (all(objects_lens == 0L))
return(set_dim(x0, ans_dim))
idx <- which(vapply(objects,
function(object) { typeof(object) != typeof(x0) },
logical(1),
USE.NAMES=FALSE))
if (length(idx) != 0L)
objects[idx] <- lapply(objects[idx], `storage.mode<-`, typeof(x0))
.Call2("C_abind", objects, nblock, ans_dim, PACKAGE="S4Arrays")
}
### A stripped-down version of abind::abind().
### Some differences:
### (a) Treatment of dimnames: simple_abind() treatment of dimnames is
### consistent with base::rbind() and base::cbind(). This is not the
### case for abind::abind() which does some strange things with the
### dimnames.
### (b) Performance: simple_abind() is much faster than abind::abind()
### (between 3x and 15x). Also note that in the 'along=1L' and 'along=2L'
### cases, it's generally as fast (and most of the time faster) than
### base::rbind() and base::cbind().
### For example, with 'm <- matrix(1:30000000, nrow=5000)',
### 'simple_abind(m, m, m, along=1L)' is 14x faster than
### 'abind::abind(m, m, m, along=1L)' and 11x faster than
### 'base::rbind(m, m, m)'.
### (c) abind::abind() is broken on matrices of type "list".
simple_abind <- function(..., along)
{
along <- .normarg_along(along)
objects <- S4Vectors:::delete_NULLs(list(...))
if (length(objects) == 0L)
return(NULL)
## Check dim compatibility.
dims <- get_dims_to_bind(objects, along)
if (is.character(dims))
stop(wmsg(dims))
if (length(objects) == 1L)
return(objects[[1L]])
## Perform the binding.
nblock <- prod(dims[-seq_len(along), 1L]) # numeric that can be >
# .Machine$integer.max
ans <- .intertwine_blocks(objects, nblock, combine_dims_along(dims, along))
## Combine and set the dimnames.
set_dimnames(ans, combine_dimnames_along(objects, dims, along))
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### simple_abind2()
###
### A wrapper to simple_abind() that adds some of the capabilities originally
### found in abind::abind():
### 1. Not all arrays (supplied via 'objects') are required to have the same
### number of dimensions. If N is the number of dimensions of the arrays
### with the most dimensions, then artifical (a.k.a. ineffective) dimensions
### are added to the arrays with less dimensions so that they also have
### N dimensions.
### 2. 'along' can be any integer value between 1 and N+1. When set to N+1,
### then one more artifical dimension is added to all arrays.
### 3. simple_abind2() supports the 'rev.along' argument which can be any
### value between 0 and N.
### 4. Arguments 'along' and 'rev.along' can be omitted, in which case binding
### happens along the N-th dimension.
.normarg_rev_along <- function(rev.along)
{
if (!isSingleNumber(rev.along))
stop(wmsg("'rev.along' must be a single number"))
if (!is.integer(rev.along))
rev.along <- as.integer(rev.along)
if (rev.along < 0L)
stop(wmsg("'rev.along' must be a non-negative integer"))
rev.along
}
### To be re-used by other abind() methods e.g. by the method for SparseArray
### objects.
get_along <- function(N, along=NULL, rev.along=NULL)
{
if (is.null(along) && is.null(rev.along))
return(N)
if (is.null(rev.along)) {
along <- .normarg_along(along)
if (along > N + 1L)
stop(wmsg("'along' must be <= N + 1, where N is the number of ",
"dimensions of the arrays with the most dimensions"))
} else {
rev.along <- .normarg_rev_along(rev.along)
if (rev.along > N)
stop(wmsg("'rev.along' must be <= N, where N is the number of ",
"dimensions of the arrays with the most dimensions"))
along <- N + 1L - rev.along
}
along
}
### 'ndim' is expected to be >= number of dimensions of the arrays with
### the most dimensions.
### To be re-used by other abind() methods e.g. by the method for SparseArray
### objects.
add_missing_dims <- function(objects, ndim)
{
stopifnot(is.list(objects), isSingleInteger(ndim))
lapply(objects,
function(object) {
object_dim <- dim(object)
object_ndim <- length(object_dim)
if (object_ndim >= ndim)
return(object)
set_dim(object, c(object_dim, rep.int(1L, ndim - object_ndim)))
}
)
}
simple_abind2 <- function(objects, along=NULL, rev.along=NULL)
{
stopifnot(is.list(objects))
objects <- S4Vectors:::delete_NULLs(objects)
if (length(objects) == 0L)
return(NULL)
ndims <- vapply(objects, function(object) length(dim(object)), integer(1))
N <- max(ndims)
along <- get_along(N, along=along, rev.along=rev.along)
ans_ndim <- max(N, along)
objects <- add_missing_dims(objects, ans_ndim)
do.call(simple_abind, c(objects, list(along=along)))
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### abind0()
###
### A simple wrapper around abind::abind() that uses 'use.first.dimnames=TRUE'
### by default to correct mishandling of the dimnames.
### See "A NOTE ABOUT CRAN PACKAGE abind 1.4-5" above in this file.
abind0 <- function(..., use.first.dimnames=TRUE)
{
abind::abind(..., use.first.dimnames=use.first.dimnames)
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### The abind() generic and default method
###
### Like in the original abind::abind(), the default for 'along' is the last
### dimension. However, here in the generic function, the default value is
### NULL instead of 'N', but it means the same thing.
setGeneric("abind", signature="...",
function(..., along=NULL, rev.along=NULL) standardGeneric("abind")
)
### All argument names of abind::abind(), ignoring the ellipsis.
.ORGINAL_ABIND_ARGNAMES <- setdiff(names(formals(abind::abind)), "...")
### Return the supplied objects in an ordinary list. Any argument that is
### not a "recognized" argument of the original abind::abind() is considered
### an object.
.extract_objects <- function(...)
{
objects <- list(...)
objects[.ORGINAL_ABIND_ARGNAMES] <- NULL
unname(S4Vectors:::delete_NULLs(objects))
}
### Return the list of supplied arguments that belong to the original
### abind::abind() interface.
.extract_crazy_args <- function(...)
{
dots <- list(...)
dots[intersect(names(dots), .ORGINAL_ABIND_ARGNAMES)]
}
### One object in 'objects' **must** be an Array derivative, but not all!
### Try to re-dispatch on an existing abind() method defined for some Array
### derivative. If such method is found, then it should return an Array
### derivative.
.abind_as_Array <- function(objects, is_Array, is_array,
along=NULL, rev.along=NULL)
{
stopifnot(is.list(objects),
is.logical(is_Array), length(is_Array) == length(objects),
is.logical(is_array), length(is_array) == length(objects))
if (!all(is_Array | is_array))
stop(wmsg("all objects passed to abind() must be array-like ",
"objects when some of them are Array derivatives"))
## Avoid infinite recursion if re-dispatch fails to find an abind()
## method defined for some Array derivatives.
if (all(is_Array)) {
x1 <- objects[[1L]]
stop(wmsg("no abind() method found for ", class(x1)[[1L]], " objects"))
}
## Coerce all arrays or matrices to the class of the first Array
## derivative found in 'objects'.
x1 <- objects[[which(is_Array)[[1L]]]]
objects[is_array] <- lapply(objects[is_array], S4Vectors:::coerce2, x1)
## Call the abind() generic in the hope that method dispatch will find
## a method defined for objects of class 'class(x1)'.
do.call(S4Arrays::abind, c(objects, list(along=along, rev.along=rev.along)))
}
### .default_abind() tries to dispatch on the following functions, in this
### order:
### 1. Dispatch on the abind() generic: If the objects to bind are supplied
### in a list then .default_abind() will dispatch on the abind() generic.
### However this time the objects are passed to abind() as individual
### arguments. This gives the abind() generic the opportunity to dispatch
### on the appropriate S4 method (e.g. on the abind() method for SparseArray
### objects if all the objects are SparseArray derivatives). If no specific
### S4 method is found then .default_abind() will be called again but note
### that infinite recursion is naturally avoided.
### 2. Dispatch on .abind_as_Array(): If at least one of the objects to
### bind is an Array derivative then .default_abind() will dispatch
### on .abind_as_Array().
### 3. Dispatch on simple_abind2(): When all the objects to bind are ordinary
### arrays (including matrices) and no "crazy arguments" are supplied,
### then .default_abind() will dispatch on simple_abind2(). Note that
### a "crazy argument" is any formal argument located after the ellipsis
### in abind::abind() except 'along' and 'rev.along'.
### 4. Finally, if all the above fails, then dispatch on abind::abind()
### wrapper abind0().
.default_abind <- function(..., along=NULL, rev.along=NULL)
{
objects <- .extract_objects(...)
if (length(objects) == 0L)
return(NULL)
crazy_args <- .extract_crazy_args(...)
all_extra_args <- c(list(along=along, rev.along=rev.along), crazy_args)
ok1 <- vapply(objects, is.list, logical(1))
ok2 <- vapply(objects, is.array, logical(1))
ok3 <- vapply(objects, is, logical(1), "Array")
if (any(ok1 & !(ok2 | ok3))) {
## 1. Dispatch on the abind() generic.
if (length(objects) != 1L)
stop(wmsg("when supplying a list, all the objects ",
"to bind must be supplied via the list"))
x <- objects[[1L]]
return(do.call(S4Arrays::abind, c(x, all_extra_args)))
}
if (any(ok3)) {
## 2. Dispatch on .abind_as_Array().
if (length(crazy_args) != 0L) {
argnames <- paste0("'", names(crazy_args), "'", collapse=",")
stop(wmsg("unsupported argument(s) when calling abind() ",
"on Array derivatives: ", argnames))
}
return(.abind_as_Array(objects, ok3, ok2,
along=along, rev.along=rev.along))
}
if (all(ok2) && length(crazy_args) == 0L) {
## 3. Dispatch on simple_abind2(), which is significantly faster than
## abind::abind().
return(simple_abind2(objects, along=along, rev.along=rev.along))
}
## 4. Dispatch on abind::abind() wrapper abind0().
all_extra_args <- S4Vectors:::delete_NULLs(all_extra_args)
do.call(abind0, c(objects, all_extra_args))
}
setMethod("abind", "ANY", .default_abind)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Bind arrays along their 1st or 2nd dimension
###
### TODO: No need for these functions to be generics. They should just be
### simple wrappers to 'abind(..., along=1L)' and 'abind(..., along=2L)',
### respectively.
setGeneric("arbind", function(...) standardGeneric("arbind"))
setGeneric("acbind", function(...) standardGeneric("acbind"))
setMethod("arbind", "ANY", function(...) abind(..., along=1L))
setMethod("acbind", "ANY", function(...) abind(..., along=2L))
|