File: is_nonzero.R

package info (click to toggle)
r-bioc-sparsearray 1.6.2%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,768 kB
  • sloc: ansic: 16,138; makefile: 2
file content (250 lines) | stat: -rw-r--r-- 9,419 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
### =========================================================================
### The is_nonzero() and nz*() functions
### -------------------------------------------------------------------------
###
### A set of generic functions for direct manipulation of the nonzero
### elements of an array-like object.
###


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### is_nonzero()
###
### Returns an array-like object of type "logical".

setGeneric("is_nonzero", function(x) standardGeneric("is_nonzero"))

### Works on any vector-like or array-like object 'x' that supports 'type(x)'
### and comparison with zero ('x != zero'). One notable exception are
### ngRMatrix objects from the Matrix package because 'x != FALSE' is
### broken on these objects.
.default_is_nonzero <- function(x)
{
    ## Make sure to use 'type()' and not 'typeof()'.
    zero <- vector_of_zeros(type(x), length=1L)
    is_nonzero <- x != zero  # broken on ngRMatrix objects!
    is_nonzero | is.na(is_nonzero)
}

setMethod("is_nonzero", "ANY", .default_is_nonzero)

### IMPORTANT NOTE: Surprisingly, and unfortunately, [d|l]gCMatrix objects
### are allowed to have zeros in their '@x' slot.
### For example, with Matrix 1.3-4:
###
###   dgcm <- as(matrix(c(11:13, 0, 0, 21:25), ncol=2), "dgCMatrix")
###
### In an lgCMatrix object:
###
###   lgcm <- dgcm >= 13
###   lgcm
###   # 5 x 2 sparse Matrix of class "lgCMatrix"
###   # [1,] : |
###   # [2,] : |
###   # [3,] | |
###   # [4,] . |
###   # [5,] . |
###   lgcm@x
###   [1] FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
###
### In a dgCMatrix object:
###
###   dgcm[cbind(3:4, 2)] <- 0
###   dgcm
###   # 5 x 2 sparse Matrix of class "dgCMatrix"
###   # [1,] 11 21
###   # [2,] 12 22
###   # [3,] 13  0
###   # [4,]  .  0
###   # [5,]  . 25
###   dgcm@x
###   # [1] 11 12 13 21 22  0  0 25
###
### This is still considered a valid dgCMatrix object:
###
###   validObject(dgcm)
###   # [1] TRUE
###
### Interestingly this doesn't happen when using a linear index in the
### subassignment:
###
###   dgcm[1:2] <- 0
###   dgcm@x
###   # [1] 13 21 22 25
###
### A consequence of this is that the method below is not 100% reliable
### on a [d|l]gCMatrix object because 'as(x, "nMatrix")' will report
### nonzero elements based on the pattern represented by the '@i' and '@p'
### slots of the input object, and regardless of what the corresponding
### values in its '@x' slot is. In other words, this method can return
### false positives on a [d|l]gCMatrix object.
setMethod("is_nonzero", "sparseMatrix", function(x) as(x, "nMatrix"))


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### nzcount()
###
### Returns the number of nonzero array elements in 'x'.

setGeneric("nzcount", function(x) standardGeneric("nzcount"))

setMethod("nzcount", "ANY", function(x) sum(is_nonzero(x)))

### Not 100% reliable because [C|R|T]sparseMatrix objects are allowed to
### have zeros in their '@x' slot! See IMPORTANT NOTE above in this file.
### On such objects 'length(x@i)' (or 'length(x@j)') will report more nonzero
### elements than there really are (false positives). However, nzcount() and
### is_nonzero() are guaranteed to be consistent, which is what matters most.
setMethod("nzcount", "CsparseMatrix", function(x) length(x@i))
setMethod("nzcount", "RsparseMatrix", function(x) length(x@j))
setMethod("nzcount", "TsparseMatrix", function(x) length(x@i)) # == length(x@j)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### nzwhich()
###
### Returns the indices of the nonzero array elements in 'x', either as
### an L-index (if 'arr.ind=FALSE') or as an M-index (if 'arr.ind=TRUE').
### The indices must be returned sorted in strictly ascending order.
### Note that using 'arr.ind=TRUE' won't work if 'nzcount(x)' is >= 2^31.

setGeneric("nzwhich", signature="x",
    function(x, arr.ind=FALSE) standardGeneric("nzwhich")
)

default_nzwhich <- function(x, arr.ind=FALSE)
{
    if (!isTRUEorFALSE(arr.ind))
        stop(wmsg("'arr.ind' must be TRUE or FALSE"))
    which(is_nonzero(x), arr.ind=arr.ind, useNames=FALSE)
}

setMethod("nzwhich", "ANY", default_nzwhich)

### default_nzwhich() above works fine on [C|R]sparseMatrix derivatives
### (except on ngRMatrix objects) but nzwhich_CsparseMatrix() and
### nzwhich_RsparseMatrix() below are more efficient:
### - nzwhich_CsparseMatrix() is typically 10x to 50x faster than
###   default_nzwhich() on big CsparseMatrix objects;
### - nzwhich_RsparseMatrix() is only slightly faster than default_nzwhich()
###   but is provided mostly to make nzwhich() work on ngRMatrix objects
###   (default_nzwhich() doesn't work on these objects, see above).
### NOTE: nzwhich_CsparseMatrix() and nzwhich_RsparseMatrix() use a shortcut
### that is **NOT** 100% reliable on [d|l]gCMatrix or [d|l]gRMatrix objects
### because these objects are allowed to have zeros in their '@x' slot (see
### IMPORTANT NOTE above in this file). So in this case the functions will
### produce a result that contains some false positives. However, nzwhich()
### is guaranteed to be consistent with nzcount() and is_nonzero(), which is
### what matters most.
nzwhich_CsparseMatrix <- function(x, arr.ind=FALSE)
{
    if (!isTRUEorFALSE(arr.ind))
        stop(wmsg("'arr.ind' must be TRUE or FALSE"))
    x_nrow <- nrow(x)
    x_ncol <- ncol(x)
    ## If 'x' is a "long object" (i.e. length(x) >= 2^31) then we'll get
    ## an integer overflow in 'x_nrow * (seq_len(x_ncol) - 1L)' below.
    ## Coercing 'x_nrow' to double avoids that.
    if (is.double(length(x)))
        x_nrow <- as.double(x_nrow)
    offsets <- rep.int(x_nrow * (seq_len(x_ncol) - 1L), diff(x@p))
    ans <- x@i + offsets + 1L
    if (!arr.ind)
        return(ans)
    Lindex2Mindex(ans, dim(x))
}

nzwhich_RsparseMatrix <- function(x, arr.ind=FALSE)
{
    if (!isTRUEorFALSE(arr.ind))
        stop(wmsg("'arr.ind' must be TRUE or FALSE"))
    x_nrow <- nrow(x)
    x_ncol <- ncol(x)
    if (is.double(length(x)))
        x_ncol <- as.double(x_ncol)  # see nzwhich_CsparseMatrix() above
    offsets <- rep.int(x_ncol * (seq_len(x_nrow) - 1L), diff(x@p))
    transposed_nzwhich <- x@j + offsets + 1L
    transposed_arr_ind <- Lindex2Mindex(transposed_nzwhich, rev(dim(x)))
    transposed_arr_ind1 <- transposed_arr_ind[ , 1L]
    transposed_arr_ind2 <- transposed_arr_ind[ , 2L]
    oo <- order(transposed_arr_ind1, transposed_arr_ind2)
    arr_ind1 <- transposed_arr_ind2[oo]
    arr_ind2 <- transposed_arr_ind1[oo]
    arr_ind <- cbind(arr_ind1, arr_ind2, deparse.level=0)
    if (arr.ind)
        return(arr_ind)
    Mindex2Lindex(arr_ind, dim(x))
}

setMethod("nzwhich", "CsparseMatrix", nzwhich_CsparseMatrix)
setMethod("nzwhich", "RsparseMatrix", nzwhich_RsparseMatrix)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### nzvals()
###
### Extracts the nonzero values from 'x'. The nonzero values are returned
### in a vector of the same type() as 'x' and parallel to nzwhich(x).
### Equivalent to 'x[nzwhich(x)]' (and that's what the default method
### below does). However, specialized methods have the potential to make
### this dramatically faster.

setGeneric("nzvals", function(x) standardGeneric("nzvals"))

### Assumes that array-like object 'x' supports subsetting by a linear
### numeric subscript (L-index).
### Notes:
### - Subsetting by an M-index i.e. subsetting by 'nzwhich(x, arr.ind=TRUE)'
###   instead of 'nzwhich(x)' would work too but only when 'nzcount(x)'
###   is < 2^31.
### - The call to as.vector() should not be necessary since the result of
###   subsetting 'x' by an L- or M-index should already be a vector (atomic
###   or list). However, in the case of a 1D ordinary array, it's not!
###   For example, 'array(11:15)[matrix(3:1)]' is a 1D array.
###   Consider this a bug in base::`[`.
### TODO: Maybe change this to 'x[is_nonzero(x)]'? But do some timings first.
setMethod("nzvals", "ANY", function(x) as.vector(x[nzwhich(x)]))

### Not 100% reliable because [d|l]gCMatrix objects can have zeros in
### their '@x' slot (see IMPORTANT NOTE above in this file), but consistent
### with nzwhich(), nzcount(), and is_nonzero() on these objects.
setMethod("nzvals", "dgCMatrix", function(x) x@x)
setMethod("nzvals", "lgCMatrix", function(x) x@x)

setMethod("nzvals", "nMatrix", function(x) rep.int(TRUE, nzcount(x)))


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### `nzvals<-`()
###
### Replaces the values of the nonzero array elements in 'x'.
### Equivalent to 'x[nzwhich(x)] <- value' (and that's what the default
### method below does). However specialized methods have the potential to
### make this dramatically faster.
### Notes:
### - 'nzvals(x) <- nzvals(x)' should **always** be a no-op.
### - 'nzvals(x) <- 0L' will wipe out all nonzero values from 'x'.

setGeneric("nzvals<-", signature="x",
    function(x, value) standardGeneric("nzvals<-")
)

### TODO: Maybe change this to 'x[is_nonzero(x)] <- value'? But do some
### timings first.
setReplaceMethod("nzvals", "ANY",
    function(x, value)
    {
        if (!is.vector(value))
            stop(wmsg("replacement value must be a vector"))
        x[nzwhich(x)] <- value
        x
    }
)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### sparsity()
###

sparsity <- function(x) { 1 - nzcount(x) / length(x) }