1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hotdeck.R
\name{hotdeck}
\alias{hotdeck}
\title{Hot-Deck Imputation}
\usage{
hotdeck(
data,
variable = NULL,
ord_var = NULL,
domain_var = NULL,
makeNA = NULL,
NAcond = NULL,
impNA = TRUE,
donorcond = NULL,
imp_var = TRUE,
imp_suffix = "imp"
)
}
\arguments{
\item{data}{data.frame or matrix}
\item{variable}{variables where missing values should be imputed (not overlapping with ord_var)}
\item{ord_var}{variables for sorting the data set before imputation (not overlapping with variable)}
\item{domain_var}{variables for building domains and impute within these
domains}
\item{makeNA}{list of length equal to the number of variables, with values, that should be converted to NA for each variable}
\item{NAcond}{list of length equal to the number of variables, with a condition for imputing a NA}
\item{impNA}{TRUE/FALSE whether NA should be imputed}
\item{donorcond}{list of length equal to the number of variables, with a donorcond condition as character string.
e.g. ">5" or c(">5","<10). If the list element for a variable is NULL no condition will be applied for this variable.}
\item{imp_var}{TRUE/FALSE if a TRUE/FALSE variables for each imputed
variable should be created show the imputation status}
\item{imp_suffix}{suffix for the TRUE/FALSE variables showing the imputation
status}
}
\value{
the imputed data set.
}
\description{
Implementation of the popular Sequential, Random (within a domain) hot-deck
algorithm for imputation.
}
\note{
If the sequential hotdeck does not lead to a suitable,
a random donor in the group will be used.
}
\examples{
data(sleep)
sleepI <- hotdeck(sleep)
sleepI2 <- hotdeck(sleep,ord_var="BodyWgt",domain_var="Pred")
# Usage of donorcond in a simple example
sleepI3 <- hotdeck(
sleep,
variable = c("NonD", "Dream", "Sleep", "Span", "Gest"),
ord_var = "BodyWgt", domain_var = "Pred",
donorcond = list(">4", "<17", ">1.5", "\%between\%c(8,13)", ">5")
)
set.seed(132)
nRows <- 1e3
# Generate a data set with nRows rows and several variables
x <- data.frame(
x = rnorm(nRows), y = rnorm(nRows),
z = sample(LETTERS, nRows, replace = TRUE),
d1 = sample(LETTERS[1:3], nRows, replace = TRUE),
d2 = sample(LETTERS[1:2], nRows, replace = TRUE),
o1 = rnorm(nRows), o2 = rnorm(nRows), o3 = rnorm(100)
)
origX <- x
x[sample(1:nRows,nRows/10), 1] <- NA
x[sample(1:nRows,nRows/10), 2] <- NA
x[sample(1:nRows,nRows/10), 3] <- NA
x[sample(1:nRows,nRows/10), 4] <- NA
xImp <- hotdeck(x,ord_var = c("o1", "o2", "o3"), domain_var = "d2")
}
\references{
A. Kowarik, M. Templ (2016) Imputation with
R package VIM. \emph{Journal of
Statistical Software}, 74(7), 1-16.
}
\seealso{
Other imputation methods:
\code{\link{impPCA}()},
\code{\link{irmi}()},
\code{\link{kNN}()},
\code{\link{matchImpute}()},
\code{\link{medianSamp}()},
\code{\link{rangerImpute}()},
\code{\link{regressionImp}()},
\code{\link{sampleCat}()}
}
\author{
Alexander Kowarik
}
\concept{imputation methods}
\keyword{manip}
|