1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
|
#' @export
icr <- function (x, ...) UseMethod("icr")
#' Independent Component Regression
#'
#' Fit a linear regression model using independent components
#'
#' This produces a model analogous to Principal Components Regression (PCR) but
#' uses Independent Component Analysis (ICA) to produce the scores. The user
#' must specify a value of \code{n.comp} to pass to
#' \code{\link[fastICA]{fastICA}}.
#'
#' The function \code{\link{preProcess}} to produce the ICA scores for the
#' original data and for \code{newdata}.
#'
#' @aliases icr.formula icr.default icr predict.icr
#' @param formula A formula of the form \code{class ~ x1 + x2 + \dots{}}
#' @param data Data frame from which variables specified in \code{formula} are
#' preferentially to be taken.
#' @param weights (case) weights for each example - if missing defaults to 1.
#' @param subset An index vector specifying the cases to be used in the
#' training sample. (NOTE: If given, this argument must be named.)
#' @param na.action A function to specify the action to be taken if \code{NA}s
#' are found. The default action is for the procedure to fail. An alternative
#' is na.omit, which leads to rejection of cases with missing values on any
#' required variable. (NOTE: If given, this argument must be named.)
#' @param contrasts a list of contrasts to be used for some or all of the
#' factors appearing as variables in the model formula.
#' @param \dots arguments passed to \code{\link[fastICA]{fastICA}}
#' @param x matrix or data frame of \code{x} values for examples.
#' @param y matrix or data frame of target values for examples.
#' @param object an object of class \code{icr} as returned by \code{icr}.
#' @param newdata matrix or data frame of test examples.
#' @return For \code{icr}, a list with elements \item{model }{the results of
#' \code{\link[stats]{lm}} after the ICA transformation} \item{ica
#' }{pre-processing information} \item{n.comp }{number of ICA components}
#' \item{names }{column names of the original data}
#' @author Max Kuhn
#' @seealso \code{\link[fastICA]{fastICA}}, \code{\link{preProcess}},
#' \code{\link[stats]{lm}}
#' @keywords multivariate
#' @examples
#'
#' data(BloodBrain)
#'
#' icrFit <- icr(bbbDescr, logBBB, n.comp = 5)
#'
#' icrFit
#'
#' predict(icrFit, bbbDescr[1:5,])
#' @method icr formula
#' @export
icr.formula <- function (formula, data, weights, ...,
subset, na.action, contrasts = NULL)
{
m <- match.call(expand.dots = FALSE)
if (is.matrix(eval.parent(m$data)))
m$data <- as.data.frame(data, stringsAsFactors = TRUE)
m$... <- m$contrasts <- NULL
m[[1]] <- as.name("model.frame")
m <- eval.parent(m)
Terms <- attr(m, "terms")
x <- model.matrix(Terms, m, contrasts)
cons <- attr(x, "contrast")
xint <- match("(Intercept)", colnames(x), nomatch = 0)
if (xint > 0)
x <- x[, -xint, drop = FALSE]
w <- model.weights(m)
if (length(w) == 0)
w <- rep(1, nrow(x))
y <- model.response(m)
res <- icr.default(x, y, weights = w, thresh = thresh, ...)
res$terms <- Terms
res$coefnames <- colnames(x)
res$na.action <- attr(m, "na.action")
res$contrasts <- cons
res$xlevels <- .getXlevels(Terms, m)
class(res) <- c("icr.formula", "icr")
res
}
#' @rdname icr.formula
#' @method icr default
#' @importFrom stats predict lm
#' @export
icr.default <- function(x, y, ...)
{
xNames <- colnames(x)
pp <- preProcess(x, "ica", ...)
x <- predict(pp, x)
if(is.factor(y)) stop("y must be numeric")
data <- if(is.data.frame(x)) x else as.data.frame(x, stringsAsFactors = TRUE)
data$y <- y
modelFit <- lm(y ~ ., data = data)
out <- list(model = modelFit,
ica = pp,
dim = dim(x),
n.comp = list(...)$n.comp,
names = xNames)
class(out) <- "icr"
out
}
#' @importFrom stats4 coef
#' @export
print.icr <- function (x, digits = max(3, getOption("digits") - 3), ...)
{
cat("Independent Component Regression\n\n")
cat("Created from", x$dim[1], "samples and", x$dim[2], "variables\n\n")
if (length(coef(x$model))) {
cat("Coefficients:\n")
print.default(
format(
coef(x$model),
digits = digits),
print.gap = 2,
quote = FALSE)
}
else cat("No coefficients\n")
cat("\n")
invisible(x)
}
#' @rdname icr.formula
#' @method predict icr
#' @importFrom stats .checkMFClasses delete.response model.frame model.matrix predict na.omit fitted
#' @export
predict.icr <- function(object, newdata, ...)
{
loadNamespace("fastICA")
if (!inherits(object, "icr")) stop("object not of class \"icr\"")
if (missing(newdata))
{
return(fitted(object$model))
} else {
if (inherits(object, "icr.formula")) {
newdata <- as.data.frame(newdata, stringsAsFactors = TRUE)
rn <- row.names(newdata)
Terms <- delete.response(object$terms)
m <- model.frame(Terms, newdata, na.action = na.omit,
xlev = object$xlevels)
if (!is.null(cl <- attr(Terms, "dataClasses")))
.checkMFClasses(cl, m)
keep <- match(row.names(m), rn)
x <- model.matrix(Terms, m, contrasts = object$contrasts)
xint <- match("(Intercept)", colnames(x), nomatch = 0)
if (xint > 0)
x <- x[, -xint, drop = FALSE]
}
else {
if (is.null(dim(newdata)))
dim(newdata) <- c(1, length(newdata))
x <- as.matrix(newdata)
if (any(is.na(x)))
stop("missing values in 'x'")
keep <- 1:nrow(x)
rn <- rownames(x)
}
}
if(!is.null(object$names))
{
x <- x[, object$names, drop = FALSE]
}
if(!is.data.frame(x)) x <- as.data.frame(x, stringsAsFactors = TRUE)
x <- predict(object$ica, x)
predict(object$model, x, ...)
}
|