File: ScaledMatrix.R

package info (click to toggle)
r-bioc-scaledmatrix 1.6.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 172 kB
  • sloc: makefile: 2
file content (197 lines) | stat: -rw-r--r-- 7,685 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#' The ScaledMatrix class
#'
#' Defines the ScaledMatrixSeed and ScaledMatrix classes and their associated methods.
#' These classes support delayed centering and scaling of the columns in the same manner as \code{\link{scale}},
#' but preserving the original data structure for more efficient operations like matrix multiplication.
#' 
#' @param x A matrix or any matrix-like object (e.g., from the \pkg{Matrix} package).
#' 
#' This can alternatively be a ScaledMatrixSeed, in which case any values of \code{center} and \code{scale} are ignored.
#' @param center A numeric vector of length equal to \code{ncol(x)}, where each element is to be subtracted from the corresponding column of \code{x}.
#' A \code{NULL} value indicates that no subtraction is to be performed.
#' Alternatively \code{TRUE}, in which case it is set to the column means of \code{x}.
#' @param scale A numeric vector of length equal to \code{ncol(x)}, where each element is to divided from the corresponding column of \code{x} (after subtraction).
#' A \code{NULL} value indicates that no division is to be performed.
#' Alternatively \code{TRUE}, in which case it is set to the column-wise root-mean-squared differences from \code{center} 
#' (interpretable as standard deviations if \code{center} is set to the column means, see \code{\link{scale}} for commentary).
#' 
#' @return 
#' The \code{ScaledMatrixSeed} constructor will return a ScaledMatrixSeed object.
#' 
#' The \code{ScaledMatrix} constructor will return a ScaledMatrix object equivalent to \code{t((t(x) - center)/scale)}.
#' 
#' @section Methods for ScaledMatrixSeed objects:
#' ScaledMatrixSeed objects are implemented as \linkS4class{DelayedMatrix} backends.
#' They support standard operations like \code{dim}, \code{dimnames} and \code{extract_array}.
#' 
#' Passing a ScaledMatrixSeed object to the \code{\link{DelayedArray}} constructor will create a ScaledMatrix object.
#' 
#' It is possible for \code{x} to contain a ScaledMatrix, thus nesting one ScaledMatrix inside another.
#' This can occasionally be useful in combination with transposition to achieve centering/scaling in both dimensions.
#' 
#' @section Methods for ScaledMatrix objects:
#' ScaledMatrix objects are derived from \linkS4class{DelayedMatrix} objects and support all of valid operations on the latter.
#' Several functions are specialized for greater efficiency when operating on ScaledMatrix instances, including:
#' \itemize{
#'     \item Subsetting, transposition and replacement of row/column names.
#'         These will return a new ScaledMatrix rather than a DelayedMatrix.
#'     \item Matrix multiplication via \code{\%*\%}, \code{crossprod} and \code{tcrossprod}.
#'         These functions will return a DelayedMatrix.
#'     \item Calculation of row and column sums and means by \code{colSums}, \code{rowSums}, etc. 
#' }
#' 
#' All other operations applied to a ScaledMatrix will use the underlying \pkg{DelayedArray} machinery.
#' Unary or binary operations will generally create a new DelayedMatrix instance containing a ScaledMatrixSeed.
#' 
#' Tranposition can effectively be used to allow centering/scaling on the rows if the input \code{x} is transposed.
#'
#' @section Efficiency vs precision:
#' The raison d'etre of the ScaledMatrix is that it can offer faster matrix multiplication by avoiding the \pkg{DelayedArray} block processing.
#' This is done by refactoring the scaling/centering operations to use the (hopefully more efficient) multiplication operator of the original matrix \code{x}.
#' Unfortunately, the speed-up comes at the cost of increasing the risk of catastrophic cancellation.
#' The procedure requires subtraction of one large intermediate number from another to obtain the values of the final matrix product.
#' This could result in a loss of numerical precision that compromises the accuracy of downstream algorithms. 
#' In practice, this does not seem to be a major concern though one should be careful if the input \code{x} contains very large positive/negative values.
#' 
#' @author
#' Aaron Lun
#' 
#' @examples
#' library(Matrix)
#' y <- ScaledMatrix(rsparsematrix(10, 20, 0.1), 
#'     center=rnorm(20), scale=1+runif(20))
#' y
#' 
#' crossprod(y)
#' tcrossprod(y)
#' y %*% rnorm(20)
#' 
#' @aliases
#' ScaledMatrixSeed
#' ScaledMatrixSeed-class
#' 
#' dim,ScaledMatrixSeed-method
#' dimnames,ScaledMatrixSeed-method
#' extract_array,ScaledMatrixSeed-method
#' DelayedArray,ScaledMatrixSeed-method
#' show,ScaledMatrixSeed-method
#' 
#' ScaledMatrix
#' ScaledMatrix-class
#' 
#' dimnames<-,ScaledMatrix,ANY-method
#' t,ScaledMatrix-method
#' [,ScaledMatrix,ANY,ANY,ANY-method
#' 
#' colSums,ScaledMatrix-method
#' rowSums,ScaledMatrix-method
#' colMeans,ScaledMatrix-method
#' rowMeans,ScaledMatrix-method
#' 
#' %*%,ANY,ScaledMatrix-method
#' %*%,ScaledMatrix,ANY-method
#' %*%,ScaledMatrix,ScaledMatrix-method
#' 
#' crossprod,ScaledMatrix,missing-method
#' crossprod,ScaledMatrix,ANY-method
#' crossprod,ANY,ScaledMatrix-method
#' crossprod,ScaledMatrix,ScaledMatrix-method
#' 
#' tcrossprod,ScaledMatrix,missing-method
#' tcrossprod,ScaledMatrix,ANY-method
#' tcrossprod,ANY,ScaledMatrix-method
#' tcrossprod,ScaledMatrix,ScaledMatrix-method
#' 
#' @docType class
#' @name ScaledMatrix
NULL

#' @export
#' @rdname ScaledMatrix
#' @importFrom DelayedArray DelayedArray
#' @importFrom Matrix colMeans rowSums t
ScaledMatrix <- function(x, center=NULL, scale=NULL) {
    if (isTRUE(center)) {
        center <- colMeans(x)
    }
    if (isTRUE(scale)) {
        tx <- t(DelayedArray(x))
        if (!is.null(center)) {
            tx <- tx - center
        }
        ss <- rowSums(tx^2)
        scale <- sqrt(ss / (nrow(x) - 1))
    }
    DelayedArray(ScaledMatrixSeed(x, center=center, scale=scale))
}

#' @export
#' @importFrom DelayedArray DelayedArray new_DelayedArray
setMethod("DelayedArray", "ScaledMatrixSeed",
    function(seed) new_DelayedArray(seed, Class="ScaledMatrix")
)

###################################
# Overridden utilities from DelayedArray, for efficiency.

#' @export
#' @importFrom DelayedArray DelayedArray seed
setReplaceMethod("dimnames", "ScaledMatrix", function(x, value) {
    DelayedArray(rename_ScaledMatrixSeed(seed(x), value))
})

#' @export
#' @importFrom DelayedArray DelayedArray seed
setMethod("t", "ScaledMatrix", function(x) {
    DelayedArray(transpose_ScaledMatrixSeed(seed(x)))
})

#' @export
#' @importFrom DelayedArray DelayedArray seed
setMethod("[", "ScaledMatrix", function(x, i, j, ..., drop=TRUE) {
    if (missing(i)) i <- NULL
    if (missing(j)) j <- NULL
    out <- DelayedArray(subset_ScaledMatrixSeed(seed(x), i=i, j=j))

    if (drop && any(dim(out)==1L)) {
        return(drop(out))
    }
    out
})

###################################
# Basic matrix stats.

#' @export
#' @importFrom Matrix colSums rowSums drop
setMethod("colSums", "ScaledMatrix", function(x, na.rm = FALSE, dims = 1L) {
    if (is_transposed(seed(x))) {
        return(rowSums(t(x)))
    }

    out <- rep(1, nrow(x)) %*% x
    out <- drop(out)
    names(out) <- colnames(x)
    out
})

#' @export
#' @importFrom Matrix colSums rowSums drop
setMethod("rowSums", "ScaledMatrix", function(x, na.rm = FALSE, dims = 1L) {
    if (is_transposed(seed(x))) {
        return(colSums(t(x)))
    }

    out <- x %*% rep(1, ncol(x))
    out <- drop(out)
    names(out) <- rownames(x)
    out
})

#' @export
#' @importFrom Matrix colMeans colSums
setMethod("colMeans", "ScaledMatrix", function(x, na.rm = FALSE, dims = 1L) colSums(x)/nrow(x))

#' @export
#' @importFrom Matrix rowMeans rowSums
setMethod("rowMeans", "ScaledMatrix", function(x, na.rm = FALSE, dims = 1L) rowSums(x)/ncol(x))