File: randomSparseArray.Rd

package info (click to toggle)
r-bioc-sparsearray 1.6.2%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,768 kB
  • sloc: ansic: 16,138; makefile: 2
file content (141 lines) | stat: -rw-r--r-- 4,901 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
\name{randomSparseArray}

\alias{randomSparseArray}
\alias{randomSparseMatrix}
\alias{poissonSparseArray}
\alias{poissonSparseMatrix}

\title{Random SparseArray object}

\description{
  \code{randomSparseArray()} and \code{poissonSparseArray()} can be used
  to generate a random \link{SparseArray} object efficiently.
}

\usage{
randomSparseArray(dim, density=0.05, dimnames=NULL)
poissonSparseArray(dim, lambda=-log(0.95), density=NA, dimnames=NULL)

## Convenience wrappers for the 2D case:
randomSparseMatrix(nrow, ncol, density=0.05, dimnames=NULL)
poissonSparseMatrix(nrow, ncol, lambda=-log(0.95), density=NA,
                    dimnames=NULL)
}

\arguments{
  \item{dim}{
    The dimensions (specified as an integer vector) of the \link{SparseArray}
    object to generate.
  }
  \item{density}{
    The desired density (specified as a number >= 0 and <= 1) of the
    \link{SparseArray} object to generate, that is, the ratio between its
    number of nonzero elements and its total number of elements.
    This is \code{nzcount(x)/length(x)} or \code{1 - sparsity(x)}.

    Note that for \code{poissonSparseArray()} and \code{poissonSparseMatrix()}
    \code{density} must be < 1 and the \emph{actual} density of the returned
    object won't be exactly as requested but will typically be very close.
  }
  \item{dimnames}{
    The \emph{dimnames} to put on the object to generate. Must be \code{NULL}
    or a list of length the number of dimensions. Each list element must be
    either \code{NULL} or a character vector along the corresponding dimension.
  }
  \item{lambda}{
    The mean of the Poisson distribution. Passed internally to the calls
    to \code{\link[stats]{rpois}()}.

    Only one of \code{lambda} and \code{density} can be specified.

    When \code{density} is requested, \code{rpois()} is called internally
    with \code{lambda} set to \code{-log(1 - density)}. This is expected
    to generate Poisson data with the requested density.

    Finally note that the default value for \code{lambda} corresponds to
    a requested density of 0.05.
  }
  \item{nrow, ncol}{
    Number of rows and columns of the \link{SparseMatrix} object to generate.
  }
}

\details{
  \code{randomSparseArray()} mimics the \code{\link[Matrix]{rsparsematrix}()}
  function from the \pkg{Matrix} package but returns a \link{SparseArray}
  object instead of a dgCMatrix object.

  \code{poissonSparseArray()} populates a \link{SparseArray} object with
  Poisson data i.e. it's equivalent to:
  \preformatted{    a <- array(rpois(prod(dim), lambda), dim)
    as(a, "SparseArray")}
  but is faster and more memory efficient because intermediate dense array
  \code{a} is never generated.
}

\value{
  A \link{SparseArray} derivative (of class \link{SVT_SparseArray} or
  \link{SVT_SparseMatrix}) with the requested dimensions and density.

  The type of the returned object is \code{"double"} for
  \code{randomSparseArray()} and \code{randomSparseMatrix()},
  and \code{"integer"} for \code{poissonSparseArray()} and
  \code{poissonSparseMatrix()}.
}

\note{
  Unlike with \code{Matrix::\link[Matrix]{rsparsematrix}()} there's no
  limit on the number of nonzero elements that can be contained in the
  returned \link{SparseArray} object.

  For example \code{Matrix::rsparsematrix(3e5, 2e4, density=0.5)} will fail
  with an error but \code{randomSparseMatrix(3e5, 2e4, density=0.5)} should
  work (even though it will take some time and the memory footprint of the
  resulting object will be about 18 Gb).
}

\seealso{
  \itemize{
    \item The \code{Matrix::\link[Matrix]{rsparsematrix}} function in
          the \pkg{Matrix} package.

    \item The \code{stats::\link[stats]{rpois}} function in the
          \pkg{stats} package.

    \item \link{SVT_SparseArray} objects.
  }
}

\examples{
## ---------------------------------------------------------------------
## randomSparseArray() / randomSparseMatrix()
## ---------------------------------------------------------------------
set.seed(123)
dgcm1 <- rsparsematrix(2500, 950, density=0.1)
set.seed(123)
svt1 <- randomSparseMatrix(2500, 950, density=0.1)
svt1
type(svt1)  # "double"

stopifnot(identical(as(svt1, "dgCMatrix"), dgcm1))

## ---------------------------------------------------------------------
## poissonSparseArray() / poissonSparseMatrix()
## ---------------------------------------------------------------------
svt2 <- poissonSparseMatrix(2500, 950, density=0.1)
svt2
type(svt2)  # "integer"
1 - sparsity(svt2)  # very close to the requested density

set.seed(123)
svt3 <- poissonSparseArray(c(600, 1700, 80), lambda=0.01)
set.seed(123)
a3 <- array(rpois(length(svt3), lambda=0.01), dim(svt3))
stopifnot(identical(svt3, SparseArray(a3)))

## The memory footprint of 'svt3' is 10x smaller than that of 'a3':
object.size(svt3)
object.size(a3)
as.double(object.size(a3) / object.size(svt3))
}
\keyword{utilities}