1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
\name{randomSparseArray}
\alias{randomSparseArray}
\alias{randomSparseMatrix}
\alias{poissonSparseArray}
\alias{poissonSparseMatrix}
\title{Random SparseArray object}
\description{
\code{randomSparseArray()} and \code{poissonSparseArray()} can be used
to generate a random \link{SparseArray} object efficiently.
}
\usage{
randomSparseArray(dim, density=0.05, dimnames=NULL)
poissonSparseArray(dim, lambda=-log(0.95), density=NA, dimnames=NULL)
## Convenience wrappers for the 2D case:
randomSparseMatrix(nrow, ncol, density=0.05, dimnames=NULL)
poissonSparseMatrix(nrow, ncol, lambda=-log(0.95), density=NA,
dimnames=NULL)
}
\arguments{
\item{dim}{
The dimensions (specified as an integer vector) of the \link{SparseArray}
object to generate.
}
\item{density}{
The desired density (specified as a number >= 0 and <= 1) of the
\link{SparseArray} object to generate, that is, the ratio between its
number of nonzero elements and its total number of elements.
This is \code{nzcount(x)/length(x)} or \code{1 - sparsity(x)}.
Note that for \code{poissonSparseArray()} and \code{poissonSparseMatrix()}
\code{density} must be < 1 and the \emph{actual} density of the returned
object won't be exactly as requested but will typically be very close.
}
\item{dimnames}{
The \emph{dimnames} to put on the object to generate. Must be \code{NULL}
or a list of length the number of dimensions. Each list element must be
either \code{NULL} or a character vector along the corresponding dimension.
}
\item{lambda}{
The mean of the Poisson distribution. Passed internally to the calls
to \code{\link[stats]{rpois}()}.
Only one of \code{lambda} and \code{density} can be specified.
When \code{density} is requested, \code{rpois()} is called internally
with \code{lambda} set to \code{-log(1 - density)}. This is expected
to generate Poisson data with the requested density.
Finally note that the default value for \code{lambda} corresponds to
a requested density of 0.05.
}
\item{nrow, ncol}{
Number of rows and columns of the \link{SparseMatrix} object to generate.
}
}
\details{
\code{randomSparseArray()} mimics the \code{\link[Matrix]{rsparsematrix}()}
function from the \pkg{Matrix} package but returns a \link{SparseArray}
object instead of a dgCMatrix object.
\code{poissonSparseArray()} populates a \link{SparseArray} object with
Poisson data i.e. it's equivalent to:
\preformatted{ a <- array(rpois(prod(dim), lambda), dim)
as(a, "SparseArray")}
but is faster and more memory efficient because intermediate dense array
\code{a} is never generated.
}
\value{
A \link{SparseArray} derivative (of class \link{SVT_SparseArray} or
\link{SVT_SparseMatrix}) with the requested dimensions and density.
The type of the returned object is \code{"double"} for
\code{randomSparseArray()} and \code{randomSparseMatrix()},
and \code{"integer"} for \code{poissonSparseArray()} and
\code{poissonSparseMatrix()}.
}
\note{
Unlike with \code{Matrix::\link[Matrix]{rsparsematrix}()} there's no
limit on the number of nonzero elements that can be contained in the
returned \link{SparseArray} object.
For example \code{Matrix::rsparsematrix(3e5, 2e4, density=0.5)} will fail
with an error but \code{randomSparseMatrix(3e5, 2e4, density=0.5)} should
work (even though it will take some time and the memory footprint of the
resulting object will be about 18 Gb).
}
\seealso{
\itemize{
\item The \code{Matrix::\link[Matrix]{rsparsematrix}} function in
the \pkg{Matrix} package.
\item The \code{stats::\link[stats]{rpois}} function in the
\pkg{stats} package.
\item \link{SVT_SparseArray} objects.
}
}
\examples{
## ---------------------------------------------------------------------
## randomSparseArray() / randomSparseMatrix()
## ---------------------------------------------------------------------
set.seed(123)
dgcm1 <- rsparsematrix(2500, 950, density=0.1)
set.seed(123)
svt1 <- randomSparseMatrix(2500, 950, density=0.1)
svt1
type(svt1) # "double"
stopifnot(identical(as(svt1, "dgCMatrix"), dgcm1))
## ---------------------------------------------------------------------
## poissonSparseArray() / poissonSparseMatrix()
## ---------------------------------------------------------------------
svt2 <- poissonSparseMatrix(2500, 950, density=0.1)
svt2
type(svt2) # "integer"
1 - sparsity(svt2) # very close to the requested density
set.seed(123)
svt3 <- poissonSparseArray(c(600, 1700, 80), lambda=0.01)
set.seed(123)
a3 <- array(rpois(length(svt3), lambda=0.01), dim(svt3))
stopifnot(identical(svt3, SparseArray(a3)))
## The memory footprint of 'svt3' is 10x smaller than that of 'a3':
object.size(svt3)
object.size(a3)
as.double(object.size(a3) / object.size(svt3))
}
\keyword{utilities}
|