1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
|
\name{rankMatrix}
\title{Rank of a Matrix}
%
\keyword{algebra}
\keyword{utilities}
%
\alias{rankMatrix}
\alias{qr2rankMatrix}
%
\description{
Compute \sQuote{the} matrix rank, a well-defined functional in theory(*),
somewhat ambiguous in practice. We provide several methods, the
default corresponding to Matlab's definition.
(*) The rank of a \eqn{n \times m}{n x m} matrix \eqn{A}, \eqn{rk(A)},
is the maximal number of linearly independent columns (or rows); hence
\eqn{rk(A) \le min(n,m)}{rk(A) <= min(n,m)}.
}
\usage{
rankMatrix(x, tol = NULL,
method = c("tolNorm2", "qr.R", "qrLINPACK", "qr",
"useGrad", "maybeGrad"),
sval = svd(x, 0, 0)$d, warn.t = TRUE, warn.qr = TRUE)
qr2rankMatrix(qr, tol = NULL, isBqr = is.qr(qr), do.warn = TRUE)
}
\arguments{
\item{x}{numeric matrix, of dimension \eqn{n \times m}{n x m}, say.}
\item{tol}{nonnegative number specifying a (relative,
\dQuote{scalefree}) tolerance for testing of
\dQuote{practically zero} with specific meaning depending on
\code{method}; by default, \code{max(dim(x)) * \link{.Machine}$double.eps}
is according to Matlab's default (for its only method which is our
\code{method="tolNorm2"}).}
\item{method}{a character string specifying the computational method
for the rank, can be abbreviated:
\describe{
\item{\code{"tolNorm2"}:}{the number of singular values
\code{>= tol * max(sval)};}
\item{\code{"qrLINPACK"}:}{for a dense matrix, this is the rank of
\code{\link[base]{qr}(x, tol, LAPACK=FALSE)} (which is
\code{qr(...)$rank});
\cr
This ("qr*", dense) version used to be \emph{the} recommended way to
compute a matrix rank for a while in the past.
For sparse \code{x}, this is equivalent to \code{"qr.R"}.
}
\item{\code{"qr.R"}:}{this is the rank of triangular matrix
\eqn{R}, where \code{qr()} uses LAPACK or a "sparseQR" method
(see \code{\link{qr-methods}}) to compute the decomposition
\eqn{QR}. The rank of \eqn{R} is then defined as the number of
\dQuote{non-zero} diagonal entries \eqn{d_i} of \eqn{R}, and
\dQuote{non-zero}s fulfill
\eqn{|d_i| \ge \mathrm{tol}\cdot\max(|d_i|)}{|d_i| >= tol * max(|d_i|)}.
}
\item{\code{"qr"}:}{is for back compatibility; for dense \code{x},
it corresponds to \code{"qrLINPACK"}, whereas for sparse
\code{x}, it uses \code{"qr.R"}.
For all the "qr*" methods, singular values \code{sval} are not
used, which may be crucially important for a large sparse matrix
\code{x}, as in that case, when \code{sval} is not specified,
the default, computing \code{\link{svd}()} currently coerces
\code{x} to a dense matrix.
}
\item{\code{"useGrad"}:}{considering the \dQuote{gradient} of the
(decreasing) singular values, the index of the \emph{smallest} gap.}
\item{\code{"maybeGrad"}:}{choosing method \code{"useGrad"} only when
that seems \emph{reasonable}; otherwise using \code{"tolNorm2"}.}
%% FIXME say more
}
}
\item{sval}{numeric vector of non-increasing singular values of
\code{x}; typically unspecified and computed from \code{x} when
needed, i.e., unless \code{method = "qr"}.}
\item{warn.t}{logical indicating if \code{rankMatrix()} should warn
when it needs \code{\link{t}(x)} instead of \code{x}. Currently, for
\code{method = "qr"} only, gives a warning by default because the
caller often could have passed \code{t(x)} directly, more efficiently.}
\item{warn.qr}{in the \eqn{QR} cases (i.e., if \code{method} starts with
\code{"qr"}), \code{rankMatrix()} calls
\code{qr2rankMarix(.., do.warn = warn.qr)}, see below.}
%% qr2rankMatrix():
\item{qr}{an \R object resulting from \code{\link{qr}(x,..)}, i.e.,
typically inheriting from \code{\link{class}} \code{"\link{qr}"} or
\code{"\linkS4class{sparseQR}"}.}
\item{isBqr}{\code{\link{logical}} indicating if \code{qr} is resulting
from \pkg{base} \code{\link[base]{qr}()}. (Otherwise, it is typically
from \pkg{Matrix} package sparse \code{\link[Matrix]{qr}}.)}
\item{do.warn}{logical; if true, warn about non-finite diagonal
entries in the \eqn{R} matrix of the \eqn{QR} decomposition.
Do not change lightly!}
}
\details{
\code{qr2rankMatrix()} is typically called from \code{rankMatrix()} for
the \code{"qr"}* \code{method}s, but can be used directly - much more
efficiently in case the \code{qr}-decomposition is available anyway.
}
\note{For large sparse matrices \code{x}, unless you can specify
\code{sval} yourself, currently \code{method = "qr"} may
be the only feasible one, as the others need \code{sval} and call
\code{\link{svd}()} which currently coerces \code{x} to a
\code{\linkS4class{denseMatrix}} which may be very slow or impossible,
depending on the matrix dimensions.
Note that in the case of sparse \code{x}, \code{method = "qr"}, all
non-strictly zero diagonal entries \eqn{d_i} where counted, up to
including \pkg{Matrix} version 1.1-0, i.e., that method implicitly
used \code{tol = 0}, see also the \code{set.seed(42)} example below.
}
\value{
If \code{x} is a matrix of all \code{0} (or of zero dimension), the rank
is zero; otherwise, typically a positive integer in \code{1:min(dim(x))}
with attributes detailing the method used.
There are rare cases where the sparse \eqn{QR} decomposition
\dQuote{fails} in so far as the diagonal entries of \eqn{R}, the
\eqn{d_i} (see above), end with non-finite, typically \code{\link{NaN}}
entries. Then, a warning is signalled (unless \code{warn.qr} /
\code{do.warn} is not true) and \code{NA} (specifically,
\code{\link{NA_integer_}}) is returned.
}
% \references{
% %% ~put references to the literature/web site here ~
% }
\author{Martin Maechler; for the "*Grad" methods building on
suggestions by Ravi Varadhan.
}
\seealso{
\code{\link{qr}}, \code{\link{svd}}.
}
\examples{
\dontshow{ % for R_DEFAULT_PACKAGES=NULL
library(stats, pos = "package:base", verbose = FALSE)
}
rankMatrix(cbind(1, 0, 1:3)) # 2
(meths <- eval(formals(rankMatrix)$method))
## a "border" case:
H12 <- Hilbert(12)
rankMatrix(H12, tol = 1e-20) # 12; but 11 with default method & tol.
sapply(meths, function(.m.) rankMatrix(H12, method = .m.))
## tolNorm2 qr.R qrLINPACK qr useGrad maybeGrad
## 11 11 12 12 11 11
## The meaning of 'tol' for method="qrLINPACK" and *dense* x is not entirely "scale free"
rMQL <- function(ex, M) rankMatrix(M, method="qrLINPACK",tol = 10^-ex)
rMQR <- function(ex, M) rankMatrix(M, method="qr.R", tol = 10^-ex)
sapply(5:15, rMQL, M = H12) # result is platform dependent
## 7 7 8 10 10 11 11 11 12 12 12 {x86_64}
sapply(5:15, rMQL, M = 1000 * H12) # not identical unfortunately
## 7 7 8 10 11 11 12 12 12 12 12
sapply(5:15, rMQR, M = H12)
## 5 6 7 8 8 9 9 10 10 11 11
sapply(5:15, rMQR, M = 1000 * H12) # the *same*
\dontshow{
(r12 <- sapply(5:15, rMQR, M = H12))
stopifnot(identical(r12, sapply(5:15, rMQR, M = H12 / 100)),
identical(r12, sapply(5:15, rMQR, M = H12 * 1e5)))
rM1 <- function(ex, M) rankMatrix(M, tol = 10^-ex)
(r12 <- sapply(5:15, rM1, M = H12))
stopifnot(identical(r12, sapply(5:15, rM1, M = H12 / 100)),
identical(r12, sapply(5:15, rM1, M = H12 * 1e5)))
}
## "sparse" case:
M15 <- kronecker(diag(x=c(100,1,10)), Hilbert(5))
sapply(meths, function(.m.) rankMatrix(M15, method = .m.))
#--> all 15, but 'useGrad' has 14.
sapply(meths, function(.m.) rankMatrix(M15, method = .m., tol = 1e-7)) # all 14
## "large" sparse
n <- 250000; p <- 33; nnz <- 10000
L <- sparseMatrix(i = sample.int(n, nnz, replace=TRUE),
j = sample.int(p, nnz, replace=TRUE),
x = rnorm(nnz))
(st1 <- system.time(r1 <- rankMatrix(L))) # warning+ ~1.5 sec (2013)
(st2 <- system.time(r2 <- rankMatrix(L, method = "qr"))) # considerably faster!
r1[[1]] == print(r2[[1]]) ## --> ( 33 TRUE )
\dontshow{
stopifnot(r1[[1]] == 33, 33 == r2[[1]])
if(interactive() || nzchar(Sys.getenv("R_MATRIX_CHECK_EXTRA")))
stopifnot(st2[[1]] < 0.2) # seeing 0.03 (on ~ 2010-hardware; R 3.0.2)
}
## another sparse-"qr" one, which ``failed'' till 2013-11-23:
set.seed(42)
f1 <- factor(sample(50, 1000, replace=TRUE))
f2 <- factor(sample(50, 1000, replace=TRUE))
f3 <- factor(sample(50, 1000, replace=TRUE))
D <- t(do.call(rbind, lapply(list(f1,f2,f3), as, 'sparseMatrix')))
dim(D); nnzero(D) ## 1000 x 150 // 3000 non-zeros (= 2\%)
stopifnot(rankMatrix(D, method='qr') == 148,
rankMatrix(crossprod(D),method='qr') == 148)
## zero matrix has rank 0 :
stopifnot(sapply(meths, function(.m.)
rankMatrix(matrix(0, 2, 2), method = .m.)) == 0)
}
|