1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/impPCA.R
\name{impPCA}
\alias{impPCA}
\title{Iterative EM PCA imputation}
\usage{
impPCA(
x,
method = "classical",
m = 1,
eps = 0.5,
k = ncol(x) - 1,
maxit = 100,
boot = FALSE,
verbose = TRUE
)
}
\arguments{
\item{x}{data.frame or matrix}
\item{method}{\code{"classical"} or \code{"mcd"} (robust estimation)}
\item{m}{number of multiple imputations (only if parameter \code{boot} equals \code{TRUE})}
\item{eps}{threshold for convergence}
\item{k}{number of principal components for reconstruction of \code{x}}
\item{maxit}{maximum number of iterations}
\item{boot}{residual bootstrap (if \code{TRUE})}
\item{verbose}{TRUE/FALSE if additional information about the imputation
process should be printed}
}
\value{
the imputed data set. If \code{boot = FALSE} this is a data.frame.
If \code{boot = TRUE} this is a list where each list element contains a data.frame.
}
\description{
Greedy algorithm for EM-PCA including robust methods
}
\examples{
data(Animals, package = "MASS")
Animals$brain[19] <- Animals$brain[19] + 0.01
Animals <- log(Animals)
colnames(Animals) <- c("log(body)", "log(brain)")
Animals_na <- Animals
probs <- abs(Animals$`log(body)`^2)
probs <- rep(0.5, nrow(Animals))
probs[c(6,16,26)] <- 0
set.seed(1234)
Animals_na[sample(1:nrow(Animals), 10, prob = probs), "log(brain)"] <- NA
w <- is.na(Animals_na$`log(brain)`)
impPCA(Animals_na)
impPCA(Animals_na, method = "mcd")
impPCA(Animals_na, boot = TRUE, m = 10)
impPCA(Animals_na, method = "mcd", boot = TRUE)[[1]]
plot(`log(brain)` ~ `log(body)`, data = Animals, type = "n", ylab = "", xlab="")
mtext(text = "impPCA robust", side = 3)
points(Animals$`log(body)`[!w], Animals$`log(brain)`[!w])
points(Animals$`log(body)`[w], Animals$`log(brain)`[w], col = "grey", pch = 17)
imputed <- impPCA(Animals_na, method = "mcd", boot = TRUE)[[1]]
colnames(imputed) <- c("log(body)", "log(brain)")
points(imputed$`log(body)`[w], imputed$`log(brain)`[w], col = "red", pch = 20, cex = 1.4)
segments(x0 = Animals$`log(body)`[w], x1 = imputed$`log(body)`[w], y0 = Animals$`log(brain)`[w],
y1 = imputed$`log(brain)`[w], lty = 2, col = "grey")
legend("topleft", legend = c("non-missings", "set to missing", "imputed values"),
pch = c(1,17,20), col = c("black","grey","red"), cex = 0.7)
mape <- round(100* 1/sum(is.na(Animals_na$`log(brain)`)) * sum(abs((Animals$`log(brain)` -
imputed$`log(brain)`) / Animals$`log(brain)`)), 2)
s2 <- var(Animals$`log(brain)`)
nrmse <- round(sqrt(1/sum(is.na(Animals_na$`log(brain)`)) * sum(abs((Animals$`log(brain)` -
imputed$`log(brain)`) / s2))), 2)
text(x = 8, y = 1.5, labels = paste("MAPE =", mape))
text(x = 8, y = 0.5, labels = paste("NRMSE =", nrmse))
}
\references{
Serneels, Sven and Verdonck, Tim (2008).
Principal component analysis for data containing outliers and missing elements.
Computational Statistics and Data Analysis, Elsevier, vol. 52(3), pages 1712-1727
}
\seealso{
Other imputation methods:
\code{\link{hotdeck}()},
\code{\link{irmi}()},
\code{\link{kNN}()},
\code{\link{matchImpute}()},
\code{\link{medianSamp}()},
\code{\link{rangerImpute}()},
\code{\link{regressionImp}()},
\code{\link{sampleCat}()}
}
\author{
Matthias Templ
}
\concept{imputation methods}
\keyword{manip}
|