File: randomSparseArray.R

package info (click to toggle)
r-bioc-sparsearray 1.6.2%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,768 kB
  • sloc: ansic: 16,138; makefile: 2
file content (163 lines) | stat: -rw-r--r-- 5,846 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
### =========================================================================
### randomSparseArray() and poissonSparseArray()
### -------------------------------------------------------------------------


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### randomSparseArray()
###

### Returns an SVT_SparseArray object of type "double".
randomSparseArray <- function(dim, density=0.05, dimnames=NULL)
{
    if (!is.numeric(dim))
        stop(wmsg("'dim' must be an integer vector"))
    if (!is.integer(dim))
        dim <- as.integer(dim)
    if (!isSingleNumber(density) || density < 0 || density > 1)
        stop(wmsg("'density' must be a number >= 0 and <= 1"))

    ## Start with an empty sparse array.
    ans <- new_SVT_SparseArray(dim, dimnames=dimnames, type="double")

    ## Add the nonzero values to it.
    ans_len <- length(ans)
    nzcount <- as.integer(ans_len * density)
    Lindex <- sample.int(ans_len, nzcount)
    nzvals <- signif(rnorm(nzcount), 2)
    if (nzcount <= .Machine$integer.max) {
        ## Using an Mindex seems to be slightly faster (4%-5%) than using an
        ## Lindex but we can only do this when the resulting Mindex matrix
        ## has < 2^31 rows.
        ans[Lindex2Mindex(Lindex, dim(ans))] <- nzvals
    } else {
        ans[Lindex] <- nzvals
    }

    ans
}

randomSparseMatrix <- function(nrow=1L, ncol=1L, density=0.05, dimnames=NULL)
{
    if (!isSingleNumber(nrow) || !isSingleNumber(ncol))
        stop(wmsg("'nrow' and 'ncol' must be single integers"))
    randomSparseArray(c(nrow, ncol), density=density, dimnames=dimnames)
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### poissonSparseArray()
###

### Like stats::rpois() but slightly faster and implementation is much
### simpler. Only for 0 <= 'lambda' <= 4.
### NOT exported.
simple_rpois <- function(n, lambda)
    SparseArray.Call("C_simple_rpois", n, lambda)

### Returns an SVT_SparseArray object of type "integer".
### Density of the returned object is expected to be about '1 - exp(-lambda)'.
### Default for 'lambda' is set to -log(0.95) which should produce an object
### with an expected density of 0.05.
poissonSparseArray <- function(dim, lambda=-log(0.95), density=NA,
                               dimnames=NULL)
{
    dim <- S4Arrays:::normarg_dim(dim)

    if (!missing(lambda) && !identical(density, NA))
        stop(wmsg("only one of 'lambda' and 'density' can be specified"))
    if (!missing(lambda)) {
        if (!isSingleNumber(lambda) || lambda < 0)
            stop(wmsg("'lambda' must be a non-negative number"))
    } else if (!identical(density, NA)) {
        if (!isSingleNumber(density) || density < 0 || density >= 1)
            stop(wmsg("'density' must be a number >= 0 and < 1"))
        lambda <- -log(1 - density)
    }

    ans_SVT <- SparseArray.Call("C_poissonSparseArray", dim, lambda)
    new_SVT_SparseArray(dim, dimnames=dimnames, type="integer", SVT=ans_SVT,
                        check=FALSE)
}

### Replacement for rpois() when 'n' is big and 'lambda' is small.
### For example:
###     .sparse_rpois(3e9, 0.005)  # takes about 1 min. and uses < 1G of RAM
###     rpois(3e9, 0.005)          # takes about 55 sec. and uses 12G of RAM
.sparse_rpois <- function(n, lambda, chunksize=5e6L)
{
    if (n == 0L)
        return(list(integer(0), integer(0)))
    nzidx_env <- new.env(parent=emptyenv())
    nzvals_env <- new.env(parent=emptyenv())
    offset <- 0  # double to avoid integer overflow when n >= 2^31
    k <- 1L
    while (offset < n) {
        nn <- n - offset
        if (nn > chunksize)
            nn <- chunksize
        vals <- rpois(nn, lambda)
        nzidx <- nzwhich(vals)
        key <- sprintf("%04d", k)
        assign(key, offset + nzidx, envir=nzidx_env)
        assign(key, vals[nzidx], envir=nzvals_env)
        offset <- offset + nn
        k <- k + 1L
    }
    nzidx <- as.list(nzidx_env, all.names=TRUE, sorted=TRUE)
    nzidx <- unlist(nzidx, recursive=FALSE, use.names=FALSE)
    nzvals <- as.list(nzvals_env, all.names=TRUE, sorted=TRUE)
    nzvals <- unlist(nzvals, recursive=FALSE, use.names=FALSE)
    list(nzidx, nzvals)
}

### Solution based on .sparse_rpois(). Equivalent to poissonSparseArray()
### but slower and uses more memory e.g.
###
###     poissonSparseArray2(c(1e5, 2e4), density=0.02)
###
### is about 3x slower uses about 2.5x more memory than
###
###     poissonSparseArray(c(1e5, 2e4), density=0.02)
###
### NOT exported
poissonSparseArray2 <- function(dim, lambda=-log(0.95), density=NA)
{
    dim <- S4Arrays:::normarg_dim(dim)

    if (!missing(lambda) && !identical(density, NA))
        stop(wmsg("only one of 'lambda' and 'density' can be specified"))
    if (!missing(lambda)) {
        if (!isSingleNumber(lambda) || lambda < 0)
            stop(wmsg("'lambda' must be a non-negative number"))
    } else {
        if (!isSingleNumber(density) || density < 0 || density >= 1)
            stop(wmsg("'density' must be a number >= 0 and < 1"))
        lambda <- -log(1 - density)
    }

    ## Start with an empty sparse array.
    ans <- new_SVT_SparseArray(dim, type="integer")

    ## Add the nonzero values to it.
    ans_len <- length(ans)
    srp <- .sparse_rpois(ans_len, lambda)
    ans[srp[[1L]]] <- srp[[2L]]

    ans
}

poissonSparseMatrix <- function(nrow=1L, ncol=1L, lambda=-log(0.95), density=NA,
                                dimnames=NULL)
{
    if (!isSingleNumber(nrow) || !isSingleNumber(ncol))
        stop(wmsg("'nrow' and 'ncol' must be single integers"))
    if (missing(lambda)) {
        poissonSparseArray(c(nrow, ncol), density=density,
                           dimnames=dimnames)
    } else {
        poissonSparseArray(c(nrow, ncol), lambda=lambda, density=density,
                           dimnames=dimnames)
    }
}