1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
|
### =========================================================================
### randomSparseArray() and poissonSparseArray()
### -------------------------------------------------------------------------
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### randomSparseArray()
###
### Returns an SVT_SparseArray object of type "double".
randomSparseArray <- function(dim, density=0.05, dimnames=NULL)
{
if (!is.numeric(dim))
stop(wmsg("'dim' must be an integer vector"))
if (!is.integer(dim))
dim <- as.integer(dim)
if (!isSingleNumber(density) || density < 0 || density > 1)
stop(wmsg("'density' must be a number >= 0 and <= 1"))
## Start with an empty sparse array.
ans <- new_SVT_SparseArray(dim, dimnames=dimnames, type="double")
## Add the nonzero values to it.
ans_len <- length(ans)
nzcount <- as.integer(ans_len * density)
Lindex <- sample.int(ans_len, nzcount)
nzvals <- signif(rnorm(nzcount), 2)
if (nzcount <= .Machine$integer.max) {
## Using an Mindex seems to be slightly faster (4%-5%) than using an
## Lindex but we can only do this when the resulting Mindex matrix
## has < 2^31 rows.
ans[Lindex2Mindex(Lindex, dim(ans))] <- nzvals
} else {
ans[Lindex] <- nzvals
}
ans
}
randomSparseMatrix <- function(nrow=1L, ncol=1L, density=0.05, dimnames=NULL)
{
if (!isSingleNumber(nrow) || !isSingleNumber(ncol))
stop(wmsg("'nrow' and 'ncol' must be single integers"))
randomSparseArray(c(nrow, ncol), density=density, dimnames=dimnames)
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### poissonSparseArray()
###
### Like stats::rpois() but slightly faster and implementation is much
### simpler. Only for 0 <= 'lambda' <= 4.
### NOT exported.
simple_rpois <- function(n, lambda)
SparseArray.Call("C_simple_rpois", n, lambda)
### Returns an SVT_SparseArray object of type "integer".
### Density of the returned object is expected to be about '1 - exp(-lambda)'.
### Default for 'lambda' is set to -log(0.95) which should produce an object
### with an expected density of 0.05.
poissonSparseArray <- function(dim, lambda=-log(0.95), density=NA,
dimnames=NULL)
{
dim <- S4Arrays:::normarg_dim(dim)
if (!missing(lambda) && !identical(density, NA))
stop(wmsg("only one of 'lambda' and 'density' can be specified"))
if (!missing(lambda)) {
if (!isSingleNumber(lambda) || lambda < 0)
stop(wmsg("'lambda' must be a non-negative number"))
} else if (!identical(density, NA)) {
if (!isSingleNumber(density) || density < 0 || density >= 1)
stop(wmsg("'density' must be a number >= 0 and < 1"))
lambda <- -log(1 - density)
}
ans_SVT <- SparseArray.Call("C_poissonSparseArray", dim, lambda)
new_SVT_SparseArray(dim, dimnames=dimnames, type="integer", SVT=ans_SVT,
check=FALSE)
}
### Replacement for rpois() when 'n' is big and 'lambda' is small.
### For example:
### .sparse_rpois(3e9, 0.005) # takes about 1 min. and uses < 1G of RAM
### rpois(3e9, 0.005) # takes about 55 sec. and uses 12G of RAM
.sparse_rpois <- function(n, lambda, chunksize=5e6L)
{
if (n == 0L)
return(list(integer(0), integer(0)))
nzidx_env <- new.env(parent=emptyenv())
nzvals_env <- new.env(parent=emptyenv())
offset <- 0 # double to avoid integer overflow when n >= 2^31
k <- 1L
while (offset < n) {
nn <- n - offset
if (nn > chunksize)
nn <- chunksize
vals <- rpois(nn, lambda)
nzidx <- nzwhich(vals)
key <- sprintf("%04d", k)
assign(key, offset + nzidx, envir=nzidx_env)
assign(key, vals[nzidx], envir=nzvals_env)
offset <- offset + nn
k <- k + 1L
}
nzidx <- as.list(nzidx_env, all.names=TRUE, sorted=TRUE)
nzidx <- unlist(nzidx, recursive=FALSE, use.names=FALSE)
nzvals <- as.list(nzvals_env, all.names=TRUE, sorted=TRUE)
nzvals <- unlist(nzvals, recursive=FALSE, use.names=FALSE)
list(nzidx, nzvals)
}
### Solution based on .sparse_rpois(). Equivalent to poissonSparseArray()
### but slower and uses more memory e.g.
###
### poissonSparseArray2(c(1e5, 2e4), density=0.02)
###
### is about 3x slower uses about 2.5x more memory than
###
### poissonSparseArray(c(1e5, 2e4), density=0.02)
###
### NOT exported
poissonSparseArray2 <- function(dim, lambda=-log(0.95), density=NA)
{
dim <- S4Arrays:::normarg_dim(dim)
if (!missing(lambda) && !identical(density, NA))
stop(wmsg("only one of 'lambda' and 'density' can be specified"))
if (!missing(lambda)) {
if (!isSingleNumber(lambda) || lambda < 0)
stop(wmsg("'lambda' must be a non-negative number"))
} else {
if (!isSingleNumber(density) || density < 0 || density >= 1)
stop(wmsg("'density' must be a number >= 0 and < 1"))
lambda <- -log(1 - density)
}
## Start with an empty sparse array.
ans <- new_SVT_SparseArray(dim, type="integer")
## Add the nonzero values to it.
ans_len <- length(ans)
srp <- .sparse_rpois(ans_len, lambda)
ans[srp[[1L]]] <- srp[[2L]]
ans
}
poissonSparseMatrix <- function(nrow=1L, ncol=1L, lambda=-log(0.95), density=NA,
dimnames=NULL)
{
if (!isSingleNumber(nrow) || !isSingleNumber(ncol))
stop(wmsg("'nrow' and 'ncol' must be single integers"))
if (missing(lambda)) {
poissonSparseArray(c(nrow, ncol), density=density,
dimnames=dimnames)
} else {
poissonSparseArray(c(nrow, ncol), lambda=lambda, density=density,
dimnames=dimnames)
}
}
|