File: rankMatrix.Rd

package info (click to toggle)
rmatrix 1.7-4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 12,096 kB
  • sloc: ansic: 97,203; makefile: 280; sh: 165
file content (202 lines) | stat: -rw-r--r-- 8,734 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
\name{rankMatrix}
\title{Rank of a Matrix}
%
\keyword{algebra}
\keyword{utilities}
%
\alias{rankMatrix}
\alias{qr2rankMatrix}
%
\description{
  Compute \sQuote{the} matrix rank, a well-defined functional in theory(*),
  somewhat ambiguous in practice.  We provide several methods, the
  default corresponding to Matlab's definition.

  (*) The rank of a \eqn{n \times m}{n x m} matrix \eqn{A}, \eqn{rk(A)},
      is the maximal number of linearly independent columns (or rows); hence
      \eqn{rk(A) \le min(n,m)}{rk(A) <= min(n,m)}.
}
\usage{
rankMatrix(x, tol = NULL,
           method = c("tolNorm2", "qr.R", "qrLINPACK", "qr",
                      "useGrad", "maybeGrad"),
           sval = svd(x, 0, 0)$d, warn.t = TRUE, warn.qr = TRUE)

qr2rankMatrix(qr, tol = NULL, isBqr = is.qr(qr), do.warn = TRUE)
}
\arguments{
  \item{x}{numeric matrix, of dimension \eqn{n \times m}{n x m}, say.}
  \item{tol}{nonnegative number specifying a (relative,
    \dQuote{scalefree}) tolerance for testing of
    \dQuote{practically zero} with specific meaning depending on
    \code{method}; by default, \code{max(dim(x)) * \link{.Machine}$double.eps}
    is according to Matlab's default (for its only method which is our
    \code{method="tolNorm2"}).}
  \item{method}{a character string specifying the computational method
    for the rank, can be abbreviated:
    \describe{
      \item{\code{"tolNorm2"}:}{the number of singular values
	\code{>= tol * max(sval)};}
      \item{\code{"qrLINPACK"}:}{for a dense matrix, this is the rank of
	\code{\link[base]{qr}(x, tol, LAPACK=FALSE)} (which is
	\code{qr(...)$rank});
	\cr
	This ("qr*", dense) version used to be \emph{the} recommended way to
	compute a matrix rank for a while in the past.

	For sparse \code{x}, this is equivalent to \code{"qr.R"}.
      }
      \item{\code{"qr.R"}:}{this is the rank of triangular matrix
	\eqn{R}, where \code{qr()} uses LAPACK or a "sparseQR" method
	(see \code{\link{qr-methods}}) to compute the decomposition
	\eqn{QR}.  The rank of \eqn{R} is then defined as the number of
	\dQuote{non-zero} diagonal entries \eqn{d_i} of \eqn{R}, and
	\dQuote{non-zero}s fulfill
	\eqn{|d_i| \ge \mathrm{tol}\cdot\max(|d_i|)}{|d_i| >= tol * max(|d_i|)}.
      }
      \item{\code{"qr"}:}{is for back compatibility; for dense \code{x},
	it corresponds to \code{"qrLINPACK"}, whereas for sparse
	\code{x}, it uses \code{"qr.R"}.

	For all the "qr*" methods, singular values \code{sval} are not
	used, which may be crucially important for a large sparse matrix
	\code{x}, as in that case, when \code{sval} is not specified,
	the default, computing \code{\link{svd}()} currently coerces
	\code{x} to a dense matrix.
      }
      \item{\code{"useGrad"}:}{considering the \dQuote{gradient} of the
	(decreasing) singular values, the index of the \emph{smallest} gap.}
      \item{\code{"maybeGrad"}:}{choosing method \code{"useGrad"} only when
	that seems \emph{reasonable}; otherwise using \code{"tolNorm2"}.}

%% FIXME say more
    }
  }
  \item{sval}{numeric vector of non-increasing singular values of
    \code{x}; typically unspecified and computed from \code{x} when
    needed, i.e., unless \code{method = "qr"}.}
  \item{warn.t}{logical indicating if \code{rankMatrix()} should warn
    when it needs \code{\link{t}(x)} instead of \code{x}.  Currently, for
    \code{method = "qr"} only, gives a warning by default because the
    caller often could have passed \code{t(x)} directly, more efficiently.}
  \item{warn.qr}{in the \eqn{QR} cases (i.e., if \code{method} starts with
    \code{"qr"}), \code{rankMatrix()} calls
    \code{qr2rankMarix(.., do.warn = warn.qr)}, see below.}

%% qr2rankMatrix():
  \item{qr}{an \R object resulting from \code{\link{qr}(x,..)}, i.e.,
    typically inheriting from \code{\link{class}} \code{"\link{qr}"} or
    \code{"\linkS4class{sparseQR}"}.}
  \item{isBqr}{\code{\link{logical}} indicating if \code{qr} is resulting
    from \pkg{base} \code{\link[base]{qr}()}.  (Otherwise, it is typically
    from \pkg{Matrix} package sparse \code{\link[Matrix]{qr}}.)}
  \item{do.warn}{logical; if true, warn about non-finite diagonal
    entries in the \eqn{R} matrix of the \eqn{QR} decomposition.
    Do not change lightly!}
}
\details{
  \code{qr2rankMatrix()} is typically called from \code{rankMatrix()} for
  the \code{"qr"}* \code{method}s, but can be used directly - much more
  efficiently in case the \code{qr}-decomposition is available anyway.
}
\note{For large sparse matrices \code{x}, unless you can specify
  \code{sval} yourself, currently \code{method = "qr"} may
  be the only feasible one, as the others need \code{sval} and call
  \code{\link{svd}()} which currently coerces \code{x} to a
  \code{\linkS4class{denseMatrix}} which may be very slow or impossible,
  depending on the matrix dimensions.

  Note that in the case of sparse \code{x}, \code{method = "qr"}, all
  non-strictly zero diagonal entries \eqn{d_i} where counted, up to
  including \pkg{Matrix} version 1.1-0, i.e., that method implicitly
  used \code{tol = 0}, see also the \code{set.seed(42)} example below.
}
\value{
  If \code{x} is a matrix of all \code{0} (or of zero dimension), the rank
  is zero; otherwise, typically a positive integer in \code{1:min(dim(x))}
  with attributes detailing the method used.

  There are rare cases where the sparse \eqn{QR} decomposition
  \dQuote{fails} in so far as the diagonal entries of \eqn{R}, the
  \eqn{d_i} (see above), end with non-finite, typically \code{\link{NaN}}
  entries.  Then, a warning is signalled (unless \code{warn.qr} /
  \code{do.warn} is not true) and \code{NA} (specifically,
  \code{\link{NA_integer_}}) is returned.
}
% \references{
% %% ~put references to the literature/web site here ~
% }
\author{Martin Maechler; for the "*Grad" methods building on
  suggestions by Ravi Varadhan.
}
\seealso{
 \code{\link{qr}}, \code{\link{svd}}.
}
\examples{
\dontshow{ % for R_DEFAULT_PACKAGES=NULL
library(stats, pos = "package:base", verbose = FALSE)
}
rankMatrix(cbind(1, 0, 1:3)) # 2

(meths <- eval(formals(rankMatrix)$method))

## a "border" case:
H12 <- Hilbert(12)
rankMatrix(H12, tol = 1e-20) # 12;  but  11  with default method & tol.
sapply(meths, function(.m.) rankMatrix(H12, method = .m.))
## tolNorm2   qr.R  qrLINPACK   qr  useGrad maybeGrad
##       11     11         12   12       11        11
## The meaning of 'tol' for method="qrLINPACK" and *dense* x is not entirely "scale free"
rMQL <- function(ex, M) rankMatrix(M, method="qrLINPACK",tol = 10^-ex)
rMQR <- function(ex, M) rankMatrix(M, method="qr.R",     tol = 10^-ex)
sapply(5:15, rMQL, M = H12) # result is platform dependent
##  7  7  8 10 10 11 11 11 12 12 12  {x86_64}
sapply(5:15, rMQL, M = 1000 * H12) # not identical unfortunately
##  7  7  8 10 11 11 12 12 12 12 12
sapply(5:15, rMQR, M = H12)
##  5  6  7  8  8  9  9 10 10 11 11
sapply(5:15, rMQR, M = 1000 * H12) # the *same*
\dontshow{
  (r12 <- sapply(5:15, rMQR, M = H12))
  stopifnot(identical(r12, sapply(5:15, rMQR, M = H12 / 100)),
            identical(r12, sapply(5:15, rMQR, M = H12 * 1e5)))

  rM1 <- function(ex, M) rankMatrix(M, tol = 10^-ex)
  (r12 <- sapply(5:15, rM1, M = H12))
  stopifnot(identical(r12, sapply(5:15, rM1, M = H12 / 100)),
            identical(r12, sapply(5:15, rM1, M = H12 * 1e5)))
}

## "sparse" case:
M15 <- kronecker(diag(x=c(100,1,10)), Hilbert(5))
sapply(meths, function(.m.) rankMatrix(M15, method = .m.))
#--> all 15, but 'useGrad' has 14.
sapply(meths, function(.m.) rankMatrix(M15, method = .m., tol = 1e-7)) # all 14

## "large" sparse
n <- 250000; p <- 33; nnz <- 10000
L <- sparseMatrix(i = sample.int(n, nnz, replace=TRUE),
                  j = sample.int(p, nnz, replace=TRUE),
                  x = rnorm(nnz))
(st1 <- system.time(r1 <- rankMatrix(L)))                # warning+ ~1.5 sec (2013)
(st2 <- system.time(r2 <- rankMatrix(L, method = "qr"))) # considerably faster!
r1[[1]] == print(r2[[1]]) ## -->  ( 33  TRUE )
\dontshow{
stopifnot(r1[[1]] == 33, 33 == r2[[1]])
if(interactive() || nzchar(Sys.getenv("R_MATRIX_CHECK_EXTRA")))
    stopifnot(st2[[1]] < 0.2) # seeing 0.03 (on ~ 2010-hardware; R 3.0.2)
}
## another sparse-"qr" one, which ``failed'' till 2013-11-23:
set.seed(42)
f1 <- factor(sample(50, 1000, replace=TRUE))
f2 <- factor(sample(50, 1000, replace=TRUE))
f3 <- factor(sample(50, 1000, replace=TRUE))
D <- t(do.call(rbind, lapply(list(f1,f2,f3), as, 'sparseMatrix')))
dim(D); nnzero(D) ## 1000 x 150 // 3000 non-zeros (= 2\%)
stopifnot(rankMatrix(D,           method='qr') == 148,
	  rankMatrix(crossprod(D),method='qr') == 148)

## zero matrix has rank 0 :
stopifnot(sapply(meths, function(.m.)
                        rankMatrix(matrix(0, 2, 2), method = .m.)) == 0)
}