File: hotdeck.Rd

package info (click to toggle)
r-cran-vim 6.2.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,556 kB
  • sloc: cpp: 141; sh: 12; makefile: 2
file content (109 lines) | stat: -rw-r--r-- 3,045 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hotdeck.R
\name{hotdeck}
\alias{hotdeck}
\title{Hot-Deck Imputation}
\usage{
hotdeck(
  data,
  variable = NULL,
  ord_var = NULL,
  domain_var = NULL,
  makeNA = NULL,
  NAcond = NULL,
  impNA = TRUE,
  donorcond = NULL,
  imp_var = TRUE,
  imp_suffix = "imp"
)
}
\arguments{
\item{data}{data.frame or matrix}

\item{variable}{variables where missing values should be imputed (not overlapping with ord_var)}

\item{ord_var}{variables for sorting the data set before imputation (not overlapping with variable)}

\item{domain_var}{variables for building domains and impute within these
domains}

\item{makeNA}{list of length equal to the number of variables, with values, that should be converted to NA for each variable}

\item{NAcond}{list of length equal to the number of variables, with a condition for imputing a NA}

\item{impNA}{TRUE/FALSE whether NA should be imputed}

\item{donorcond}{list of length equal to the number of variables, with a donorcond condition as character string.
e.g. ">5" or c(">5","<10). If the list element for a variable is NULL no condition will be applied for this variable.}

\item{imp_var}{TRUE/FALSE if a TRUE/FALSE variables for each imputed
variable should be created show the imputation status}

\item{imp_suffix}{suffix for the TRUE/FALSE variables showing the imputation
status}
}
\value{
the imputed data set.
}
\description{
Implementation of the popular Sequential, Random (within a domain) hot-deck
algorithm for imputation.
}
\note{
If the sequential hotdeck does not lead to a suitable,
a random donor in the group will be used.
}
\examples{

data(sleep)
sleepI <- hotdeck(sleep)
sleepI2 <- hotdeck(sleep,ord_var="BodyWgt",domain_var="Pred")

# Usage of donorcond in a simple example
sleepI3 <- hotdeck(
  sleep,
  variable = c("NonD", "Dream", "Sleep", "Span", "Gest"),
  ord_var = "BodyWgt", domain_var = "Pred",
  donorcond = list(">4", "<17", ">1.5", "\%between\%c(8,13)", ">5")
)

set.seed(132)
nRows <- 1e3
# Generate a data set with nRows rows and several variables
x <- data.frame(
  x = rnorm(nRows), y = rnorm(nRows),
  z = sample(LETTERS, nRows, replace = TRUE),
  d1 = sample(LETTERS[1:3], nRows, replace = TRUE),
  d2 = sample(LETTERS[1:2], nRows, replace = TRUE),
  o1 = rnorm(nRows), o2 = rnorm(nRows), o3 = rnorm(100)
)
origX <- x
x[sample(1:nRows,nRows/10), 1] <- NA
x[sample(1:nRows,nRows/10), 2] <- NA
x[sample(1:nRows,nRows/10), 3] <- NA
x[sample(1:nRows,nRows/10), 4] <- NA
xImp <- hotdeck(x,ord_var = c("o1", "o2", "o3"), domain_var = "d2")


}
\references{
A. Kowarik, M. Templ (2016) Imputation with
R package VIM.  \emph{Journal of
Statistical Software}, 74(7), 1-16.
}
\seealso{
Other imputation methods: 
\code{\link{impPCA}()},
\code{\link{irmi}()},
\code{\link{kNN}()},
\code{\link{matchImpute}()},
\code{\link{medianSamp}()},
\code{\link{rangerImpute}()},
\code{\link{regressionImp}()},
\code{\link{sampleCat}()}
}
\author{
Alexander Kowarik
}
\concept{imputation methods}
\keyword{manip}