1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
|
\name{lm.fit.sparse}
\alias{lm.fit.sparse}
\title{Fitter Function for Sparse Linear Models}
\description{
A basic computing engine for sparse linear least squares regression.
Note that the exact interface (arguments, return value) currently is
\bold{experimental}, and is bound to change. Use at your own risk!
}
\usage{
lm.fit.sparse(x, y, w = NULL, offset = NULL,
method = c("qr", "cholesky"),
tol = 1e-7, singular.ok = TRUE, order = NULL,
transpose = FALSE)
}
\arguments{
\item{x}{\emph{sparse} design matrix of dimension \code{n * p}, i.e.,
an \R object of a \code{\link{class}} extending
\code{\linkS4class{dsparseMatrix}}; typically the result of
\code{\link{sparse.model.matrix}}.}
\item{y}{vector of observations of length \code{n}, or a matrix with
\code{n} rows.}
\item{w}{vector of weights (length \code{n}) to be used in the fitting
process. Weighted least squares is used with weights \code{w},
i.e., \code{sum(w * e^2)} is minimized.
\bold{Not yet implemented !}
}
\item{offset}{numeric of length \code{n}). This can be used to
specify an \emph{a priori} known component to be included in the
linear predictor during fitting.}
\item{method}{a character string specifying the (factorization)
method. Currently, \code{"qr"} or \code{"cholesky"}.}
\item{tol}{[for back-compatibility only; unused:] tolerance for the
\code{\link{qr}} decomposition. Default is 1e-7.}
\item{singular.ok}{[for back-compatibility only; unused:] logical. If
\code{FALSE}, a singular model is an error.}
\item{order}{integer or \code{NULL}, for \code{method == "qr"}, will
determine how the fill-reducing ordering (aka permutation) for the
\dQuote{symbolic} part is determined (in \code{cs_amd()}), with the
options {0: natural}, {1: Chol}, {2: LU}, and {3: QR}, where
\code{3} is the default.}
\item{transpose}{
logical; if true, use the transposed matrix \code{t(x)} instead of
\code{x}.
}
}
% \details{
% %% ~~ If necessary, more details than the description above ~~
% }
\value{
Either a single numeric vector or a list of four numeric vectors.
}
\seealso{
\code{\link{glm4}} is an alternative (much) more general fitting
function.
\code{\link{sparse.model.matrix}} from the \pkg{Matrix} package;
the non-sparse function in standard \R's package \pkg{stats}:
\code{\link{lm.fit}()}.
}
\examples{
dd <- expand.grid(a = as.factor(1:3),
b = as.factor(1:4),
c = as.factor(1:2),
d= as.factor(1:8))
n <- nrow(dd <- dd[rep(seq_len(nrow(dd)), each = 10), ])
set.seed(17)
dM <- cbind(dd, x = round(rnorm(n), 1))
## randomly drop some
n <- nrow(dM <- dM[- sample(n, 50),])
dM <- within(dM, { A <- c(2,5,10)[a]
B <- c(-10,-1, 3:4)[b]
C <- c(-8,8)[c]
D <- c(10*(-5:-2), 20*c(0, 3:5))[d]
Y <- A + B + A*B + C + D + A*D + C*x + rnorm(n)/10
wts <- sample(1:10, n, replace=TRUE)
rm(A,B,C,D)
})
str(dM) # 1870 x 7
X <- Matrix::sparse.model.matrix( ~ (a+b+c+d)^2 + c*x, data = dM)
dim(X) # 1870 x 69
X[1:10, 1:20]
## For now, use 'MatrixModels:::' --- TODO : export once interface is clear!
Xd <- as(X,"matrix")
system.time(fmDense <- lm.fit(Xd, y = dM[,"Y"]))
system.time( r1 <- MatrixModels:::lm.fit.sparse(X, y = dM[,"Y"]) ) # *is* faster
stopifnot(all.equal(r1, unname(fmDense$coeff), tolerance = 1e-12))
system.time(
r2 <- MatrixModels:::lm.fit.sparse(X, y = dM[,"Y"], method = "chol") )
stopifnot(all.equal(r1, r2$coef, tolerance = 1e-12),
all.equal(fmDense$residuals, r2$residuals, tolerance=1e-9)
)
## with weights:
system.time(fmD.w <- with(dM, lm.wfit(Xd, Y, w = wts)))
system.time(fm.w1 <- with(dM, MatrixModels:::lm.fit.sparse(X, Y, w = wts)))
system.time(fm.w2 <- with(dM, MatrixModels:::lm.fit.sparse(X, Y, w = wts,
method = "chol") ))
stopifnot(all.equal(fm.w1, unname(fmD.w$coeff), tolerance = 1e-12),
all.equal(fm.w2$coef, fm.w1, tolerance = 1e-12),
all.equal(fmD.w$residuals, fm.w2$residuals, tolerance=1e-9)
)
}
\keyword{regression}
\keyword{array}
|