1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
|
# enumerate linear combinations
enumLC <- function(object, ...) UseMethod("enumLC")
#' @export
enumLC.default <- function(object, ...)
{
# there doesn't seem to be a reasonable default, so
# we'll throw an error
stop(paste('enumLC does not support ', class(object), 'objects'))
}
#' @export
enumLC.matrix <- function(object, ...)
{
# factor the matrix using QR decomposition and then process it
internalEnumLC(qr(object))
}
#' @export
enumLC.lm <- function(object, ...)
{
# extract the QR decomposition and the process it
internalEnumLC(object$qr)
}
#' @importFrom stats lm
#' @export
enumLC.formula <- function(object, ...)
{
# create an lm fit object from the formula, and then call
# appropriate enumLC method
enumLC(lm(object))
}
# this function does the actual work for all of the enumLC methods
internalEnumLC <- function(qrObj, ...)
{
R <- qr.R(qrObj) # extract R matrix
numColumns <- dim(R)[2] # number of columns in R
rank <- qrObj$rank # number of independent columns
pivot <- qrObj$pivot # get the pivot vector
if (is.null(numColumns) || rank == numColumns)
{
list() # there are no linear combinations
} else {
p1 <- 1:rank
X <- R[p1, p1] # extract the independent columns
Y <- R[p1, -p1, drop = FALSE] # extract the dependent columns
b <- qr(X) # factor the independent columns
b <- qr.coef(b, Y) # get regression coefficients of
# the dependent columns
b[abs(b) < 1e-6] <- 0 # zap small values
# generate a list with one element for each dependent column
lapply(1:dim(Y)[2],
function(i) c(pivot[rank + i], pivot[which(b[,i] != 0)]))
}
}
#' Determine linear combinations in a matrix
#'
#' Enumerate and resolve the linear combinations in a numeric matrix
#'
#' The QR decomposition is used to determine if the matrix is full rank and
#' then identify the sets of columns that are involved in the dependencies.
#'
#' To "resolve" them, columns are iteratively removed and the matrix rank is
#' rechecked.
#'
#' @param x a numeric matrix
#' @return a list with elements: \item{linearCombos }{If there are linear
#' combinations, this will be a list with elements for each dependency that
#' contains vectors of column numbers. } \item{remove }{a list of column
#' numbers that can be removed to counter the linear combinations}
#' @author Kirk Mettler and Jed Wing (\code{enumLC}) and Max Kuhn
#' (\code{findLinearCombos})
#' @keywords manip
#' @examples
#'
#' testData1 <- matrix(0, nrow=20, ncol=8)
#' testData1[,1] <- 1
#' testData1[,2] <- round(rnorm(20), 1)
#' testData1[,3] <- round(rnorm(20), 1)
#' testData1[,4] <- round(rnorm(20), 1)
#' testData1[,5] <- 0.5 * testData1[,2] - 0.25 * testData1[,3] - 0.25 * testData1[,4]
#' testData1[1:4,6] <- 1
#' testData1[5:10,7] <- 1
#' testData1[11:20,8] <- 1
#'
#' findLinearCombos(testData1)
#'
#' testData2 <- matrix(0, nrow=6, ncol=6)
#' testData2[,1] <- c(1, 1, 1, 1, 1, 1)
#' testData2[,2] <- c(1, 1, 1, 0, 0, 0)
#' testData2[,3] <- c(0, 0, 0, 1, 1, 1)
#' testData2[,4] <- c(1, 0, 0, 1, 0, 0)
#' testData2[,5] <- c(0, 1, 0, 0, 1, 0)
#' testData2[,6] <- c(0, 0, 1, 0, 0, 1)
#'
#' findLinearCombos(testData2)
#'
#' @export findLinearCombos
findLinearCombos <- function(x)
{
if(!is.matrix(x)) x <- as.matrix(x)
lcList <- enumLC(x)
initialList <- lcList
badList <- NULL
if(length(lcList) > 0)
{
continue <- TRUE
while(continue)
{
# keep removing linear dependencies until it resolves
tmp <- unlist(lapply(lcList, function(x) x[1]))
tmp <- unique(tmp[!is.na(tmp)])
badList <- unique(c(tmp, badList))
lcList <- enumLC(x[,-badList, drop = FALSE])
continue <- (length(lcList) > 0)
}
} else badList <- NULL
list(linearCombos = initialList, remove = badList)
}
|