File: impPCA.Rd

package info (click to toggle)
r-cran-vim 6.2.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,556 kB
  • sloc: cpp: 141; sh: 12; makefile: 2
file content (100 lines) | stat: -rw-r--r-- 3,292 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/impPCA.R
\name{impPCA}
\alias{impPCA}
\title{Iterative EM PCA imputation}
\usage{
impPCA(
  x,
  method = "classical",
  m = 1,
  eps = 0.5,
  k = ncol(x) - 1,
  maxit = 100,
  boot = FALSE,
  verbose = TRUE
)
}
\arguments{
\item{x}{data.frame or matrix}

\item{method}{\code{"classical"} or \code{"mcd"} (robust estimation)}

\item{m}{number of multiple imputations (only if parameter \code{boot} equals \code{TRUE})}

\item{eps}{threshold for convergence}

\item{k}{number of principal components for reconstruction of \code{x}}

\item{maxit}{maximum number of iterations}

\item{boot}{residual bootstrap (if \code{TRUE})}

\item{verbose}{TRUE/FALSE if additional information about the imputation
process should be printed}
}
\value{
the imputed data set. If \code{boot = FALSE} this is a data.frame.
If \code{boot = TRUE} this is a list where each list element contains a data.frame.
}
\description{
Greedy algorithm for EM-PCA including robust methods
}
\examples{

data(Animals, package = "MASS")
Animals$brain[19] <- Animals$brain[19] + 0.01
Animals <- log(Animals)
colnames(Animals) <- c("log(body)", "log(brain)")
Animals_na <- Animals
probs <- abs(Animals$`log(body)`^2)
probs <- rep(0.5, nrow(Animals))
probs[c(6,16,26)] <- 0
set.seed(1234)
Animals_na[sample(1:nrow(Animals), 10, prob = probs), "log(brain)"] <- NA
w <- is.na(Animals_na$`log(brain)`)
impPCA(Animals_na)
impPCA(Animals_na, method = "mcd")
impPCA(Animals_na, boot = TRUE, m = 10)
impPCA(Animals_na, method = "mcd", boot = TRUE)[[1]]
plot(`log(brain)` ~ `log(body)`, data = Animals, type = "n", ylab = "", xlab="")
mtext(text = "impPCA robust", side = 3)
points(Animals$`log(body)`[!w], Animals$`log(brain)`[!w])
points(Animals$`log(body)`[w], Animals$`log(brain)`[w], col = "grey", pch = 17)
imputed <- impPCA(Animals_na, method = "mcd", boot = TRUE)[[1]]
colnames(imputed) <- c("log(body)", "log(brain)")
points(imputed$`log(body)`[w], imputed$`log(brain)`[w], col = "red", pch = 20, cex = 1.4)
segments(x0 = Animals$`log(body)`[w], x1 = imputed$`log(body)`[w], y0 = Animals$`log(brain)`[w],
y1 = imputed$`log(brain)`[w], lty = 2, col = "grey")
legend("topleft", legend = c("non-missings", "set to missing", "imputed values"),
pch = c(1,17,20), col = c("black","grey","red"), cex = 0.7)
mape <- round(100* 1/sum(is.na(Animals_na$`log(brain)`)) * sum(abs((Animals$`log(brain)` -
imputed$`log(brain)`) / Animals$`log(brain)`)), 2)
s2 <- var(Animals$`log(brain)`)
nrmse <- round(sqrt(1/sum(is.na(Animals_na$`log(brain)`)) * sum(abs((Animals$`log(brain)` -
imputed$`log(brain)`) / s2))), 2)
text(x = 8, y = 1.5, labels = paste("MAPE =", mape))
text(x = 8, y = 0.5, labels = paste("NRMSE =", nrmse))

}
\references{
Serneels, Sven and Verdonck, Tim (2008).
Principal component analysis for data containing outliers and missing elements.
Computational Statistics and Data Analysis, Elsevier, vol. 52(3), pages 1712-1727
}
\seealso{
Other imputation methods: 
\code{\link{hotdeck}()},
\code{\link{irmi}()},
\code{\link{kNN}()},
\code{\link{matchImpute}()},
\code{\link{medianSamp}()},
\code{\link{rangerImpute}()},
\code{\link{regressionImp}()},
\code{\link{sampleCat}()}
}
\author{
Matthias Templ
}
\concept{imputation methods}
\keyword{manip}