File: irmi.Rd

package info (click to toggle)
r-cran-vim 6.2.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,556 kB
  • sloc: cpp: 141; sh: 12; makefile: 2
file content (170 lines) | stat: -rw-r--r-- 4,782 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/irmi.R
\name{irmi}
\alias{irmi}
\title{Iterative robust model-based imputation (IRMI)}
\usage{
irmi(
  x,
  eps = 5,
  maxit = 100,
  mixed = NULL,
  mixed.constant = NULL,
  count = NULL,
  step = FALSE,
  robust = FALSE,
  takeAll = TRUE,
  noise = TRUE,
  noise.factor = 1,
  force = FALSE,
  robMethod = "MM",
  force.mixed = TRUE,
  mi = 1,
  addMixedFactors = FALSE,
  trace = FALSE,
  init.method = "kNN",
  modelFormulas = NULL,
  multinom.method = "multinom",
  imp_var = TRUE,
  imp_suffix = "imp"
)
}
\arguments{
\item{x}{data.frame or matrix}

\item{eps}{threshold for convergency}

\item{maxit}{maximum number of iterations}

\item{mixed}{column index of the semi-continuous variables}

\item{mixed.constant}{vector with length equal to the number of
semi-continuous variables specifying the point of the semi-continuous
distribution with non-zero probability}

\item{count}{column index of count variables}

\item{step}{a stepwise model selection is applied when the parameter is set
to TRUE}

\item{robust}{if TRUE, robust regression methods will be applied}

\item{takeAll}{takes information of (initialised) missings in the response
as well for regression imputation.}

\item{noise}{irmi has the option to add a random error term to the imputed
values, this creates the possibility for multiple imputation. The error term
has mean 0 and variance corresponding to the variance of the regression
residuals.}

\item{noise.factor}{amount of noise.}

\item{force}{if TRUE, the algorithm tries to find a solution in any case,
possible by using different robust methods automatically.}

\item{robMethod}{regression method when the response is continuous.}

\item{force.mixed}{if TRUE, the algorithm tries to find a solution in any
case, possible by using different robust methods automatically.}

\item{mi}{number of multiple imputations.}

\item{addMixedFactors}{if TRUE add additional factor variable for each
mixed variable as X variable in the regression}

\item{trace}{Additional information about the iterations when trace equals
TRUE.}

\item{init.method}{Method for initialization of missing values (kNN or
median)}

\item{modelFormulas}{a named list with the name of variables for the  rhs
of the formulas, which must contain a rhs formula for each variable with
missing values, it should look like `list(y1=c("x1","x2"),y2=c("x1","x3"))``
if factor variables for the mixed variables should be created for the
regression models}

\item{multinom.method}{Method for estimating the multinomial models
(current default and only available method is multinom)}

\item{imp_var}{TRUE/FALSE if a TRUE/FALSE variables for each imputed
variable should be created show the imputation status}

\item{imp_suffix}{suffix for the TRUE/FALSE variables showing the imputation
status}
}
\value{
the imputed data set.
}
\description{
In each step of the iteration, one variable is used as a response variable
and the remaining variables serve as the regressors.
}
\details{
The method works sequentially and iterative. The method can deal with a
mixture of continuous, semi-continuous, ordinal and nominal variables
including outliers.

A full description of the method can be found in the mentioned reference.
}
\examples{

data(sleep)
irmi(sleep)

data(testdata)
imp_testdata1 <- irmi(testdata$wna, mixed = testdata$mixed)

# mixed.constant != 0 (-10)
testdata$wna$m1[testdata$wna$m1 == 0] <- -10
testdata$wna$m2 <- log(testdata$wna$m2 + 0.001)
imp_testdata2 <- irmi(
  testdata$wna,
  mixed = testdata$mixed,
  mixed.constant = c(-10,log(0.001))
)
imp_testdata2$m2 <- exp(imp_testdata2$m2) - 0.001

#example with fixed formulas for the variables with missing
form = list(
  NonD  = c("BodyWgt", "BrainWgt"),
  Dream = c("BodyWgt", "BrainWgt"),
  Sleep = c("BrainWgt"           ),
  Span  = c("BodyWgt"            ),
  Gest  = c("BodyWgt", "BrainWgt")
)
irmi(sleep, modelFormulas = form, trace = TRUE)

# Example with ordered variable
td <- testdata$wna
td$c1 <- as.ordered(td$c1)
irmi(td)

}
\references{
M. Templ, A. Kowarik, P. Filzmoser (2011) Iterative stepwise
regression imputation using standard and robust methods.  \emph{Journal of
Computational Statistics and Data Analysis}, Vol. 55, pp. 2793-2806.

A. Kowarik, M. Templ (2016) Imputation with
R package VIM.  \emph{Journal of
Statistical Software}, 74(7), 1-16.
}
\seealso{
\code{\link[mi:04mi]{mi::mi()}}

Other imputation methods: 
\code{\link{hotdeck}()},
\code{\link{impPCA}()},
\code{\link{kNN}()},
\code{\link{matchImpute}()},
\code{\link{medianSamp}()},
\code{\link{rangerImpute}()},
\code{\link{regressionImp}()},
\code{\link{sampleCat}()}
}
\author{
Matthias Templ, Alexander Kowarik
}
\concept{imputation methods}
\keyword{manip}