File: stepcAIC.Rd

package info (click to toggle)
r-cran-caic4 1.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 356 kB
  • sloc: makefile: 2
file content (219 lines) | stat: -rw-r--r-- 7,447 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stepcAIC.R
\name{stepcAIC}
\alias{stepcAIC}
\title{Function to stepwise select the (generalized) linear mixed model
fitted via (g)lmer() or (generalized) additive (mixed) model
fitted via gamm4() with the smallest cAIC.}
\usage{
stepcAIC(
  object,
  numberOfSavedModels = 1,
  groupCandidates = NULL,
  slopeCandidates = NULL,
  fixEfCandidates = NULL,
  numberOfPermissibleSlopes = 2,
  allowUseAcross = FALSE,
  allowCorrelationSel = FALSE,
  allowNoIntercept = FALSE,
  direction = "backward",
  trace = FALSE,
  steps = 50,
  keep = NULL,
  numCores = 1,
  data = NULL,
  returnResult = TRUE,
  calcNonOptimMod = TRUE,
  bsType = "tp",
  digits = 2,
  printValues = "caic",
  ...
)
}
\arguments{
\item{object}{object returned by \code{[lme4]{lmer}}, \code{[lme4]{glmer}} or 
\code{[gamm4]{gamm4}}}

\item{numberOfSavedModels}{integer defining how many additional models to be saved
during the step procedure. If \code{1} (DEFAULT), only the best model is returned. 
Any number \code{k} greater \code{1} will return the \code{k} best models. 
If \code{0}, all models will be returned (not recommended for larger applications).}

\item{groupCandidates}{character vector containing names of possible grouping variables for 
new random effects. Group nesting must be specified manually, i.e. by 
listing up the string of the groups in the manner of lme4. For example 
\code{groupCandidates = c("a", "b", "a/b")}.}

\item{slopeCandidates}{character vector containing names of possible new random effects}

\item{fixEfCandidates}{character vector containing names of possible (non-)linear fixed effects 
in the GAMM; NULL for the (g)lmer-use case}

\item{numberOfPermissibleSlopes}{how much slopes are permissible for one grouping variable}

\item{allowUseAcross}{allow slopes to be used in other grouping variables}

\item{allowCorrelationSel}{logical; FALSE does not allow correlations of random effects 
to be (de-)selected (default)}

\item{allowNoIntercept}{logical; FALSE does not allow random effects without random intercept}

\item{direction}{character vector indicating the direction ("both","backward","forward")}

\item{trace}{logical; should information be printed during the execution of stepcAIC?}

\item{steps}{maximum number of steps to be considered}

\item{keep}{list($fixed,$random) of formulae; which splines / fixed (fixed) or 
random effects (random) to be kept during selection; specified terms must be 
included in the original model}

\item{numCores}{the number of cores to be used in calculations; 
parallelization is done by using \code{parallel::mclapply}}

\item{data}{data.frame supplying the data used in \code{object}. \code{data} must also include 
variables, which are considered for forward updates.}

\item{returnResult}{logical; whether to return the result (best model and corresponding cAIC)}

\item{calcNonOptimMod}{logical; if FALSE, models which failed to converge are not considered 
for cAIC calculation}

\item{bsType}{type of splines to be used in forward gamm4 steps}

\item{digits}{number of digits used in printing the results}

\item{printValues}{what values of \code{c("cll", "df", "caic", "refit")} 
to print in the table of comparisons}

\item{...}{further options for cAIC call}
}
\value{
if \code{returnResult} is \code{TRUE}, a list with the best model \code{finalModel},
\code{additionalModels} if \code{numberOfSavedModels} was specified and
the corresponding cAIC \code{bestCAIC} is returned. 

Note that if \code{trace} is set to \code{FALSE} and \code{returnResult}
is also \code{FALSE}, the function call may not be meaningful
}
\description{
The step function searches the space of possible models in a greedy manner,
where the direction of the search is specified by the argument
direction. If direction = "forward" / = "backward", 
the function adds / exludes random effects until the cAIC can't be improved further.
In the case of forward-selection, either a new grouping structure, new
slopes for the random effects or new covariates modeled nonparameterically 
must be supplied to the function call.
If direction = "both", the greedy search is alternating between forward
and backward steps, where the direction is changed after each step
}
\section{Details}{
 

Note that the method can not handle mixed models with uncorrelated random effects and does NOT
reduce models to such, i.e., the model with \code{(1 + s | g)} is either reduced to
\code{(1 | g)} or \code{(0 + s | g)} but not to \code{(1 + s || g)}.
}

\examples{

(fm3 <- lmer(strength ~ 1 + (1|sample) + (1|batch), Pastes))

fm3_step <- stepcAIC(fm3, direction = "backward", trace = TRUE, data = Pastes)

fm3_min <- lm(strength ~ 1, data=Pastes)

fm3_min_step <- stepcAIC(fm3_min, groupCandidates = c("batch", "sample"), 
direction="forward", data=Pastes, trace=TRUE)
fm3_min_step <- stepcAIC(fm3_min, groupCandidates = c("batch", "sample"), 
direction="both", data=Pastes, trace=TRUE)
# try using a nested group effect which is actually not nested -> warning
fm3_min_step <- stepcAIC(fm3_min, groupCandidates = c("batch", "sample", "batch/sample"), 
                         direction="both", data=Pastes, trace=TRUE)

Pastes$time <- 1:dim(Pastes)[1]
fm3_slope <- lmer(data=Pastes, strength ~ 1 + (1 + time | cask))

fm3_slope_step <- stepcAIC(fm3_slope,direction="backward", trace=TRUE, data=Pastes)



fm3_min <- lm(strength ~ 1, data=Pastes)

fm3_min_step <- stepcAIC(fm3_min,groupCandidates=c("batch","sample"),
direction="forward", data=Pastes,trace=TRUE)



fm3_inta <- lmer(strength ~ 1 + (1|sample:batch), data=Pastes)

fm3_inta_step <- stepcAIC(fm3_inta,groupCandidates=c("batch","sample"),
direction="forward", data=Pastes,trace=TRUE)

fm3_min_step2 <- stepcAIC(fm3_min,groupCandidates=c("cask","batch","sample"),
direction="forward", data=Pastes,trace=TRUE)

fm3_min_step3 <- stepcAIC(fm3_min,groupCandidates=c("cask","batch","sample"),
direction="both", data=Pastes,trace=TRUE)

\dontrun{
fm3_inta_step2 <- stepcAIC(fm3_inta,direction="backward", 
data=Pastes,trace=TRUE)
}

##### create own example


na <- 20
nb <- 25
n <- 400
a <- sample(1:na,400,replace=TRUE)
b <- factor(sample(1:nb,400,replace=TRUE))
x <- runif(n)
y <- 2 + 3 * x + a*.02 + rnorm(n) * .4
a <- factor(a)
c <- interaction(a,b)
y <- y + as.numeric(as.character(c))*5
df <- data.frame(y=y,x=x,a=a,b=b,c=c)

smallMod <- lm(y ~ x)

\dontrun{
# throw error
stepcAIC(smallMod, groupCandidates=c("a","b","c"), data=df, trace=TRUE, returnResult=FALSE)

smallMod <- lm(y ~ x, data=df)

# throw error
stepcAIC(smallMod, groupCandidates=c("a","b","c"), data=df, trace=TRUE, returnResult=FALSE)

# get it all right
mod <- stepcAIC(smallMod, groupCandidates=c("a","b","c"), 
                data=df, trace=TRUE, 
                direction="forward", returnResult=TRUE)

# make some more steps...
stepcAIC(smallMod, groupCandidates=c("a","b","c"), data=df, trace=TRUE, 
         direction="both", returnResult=FALSE)

mod1 <- lmer(y ~ x + (1|a), data=df)

stepcAIC(mod1, groupCandidates=c("b","c"), data=df, trace=TRUE, direction="forward")
stepcAIC(mod1, groupCandidates=c("b","c"), data=df, trace=TRUE, direction="both")



mod2 <- lmer(y ~ x + (1|a) + (1|c), data=df)

stepcAIC(mod2, data=df, trace=TRUE, direction="backward")

mod3 <- lmer(y ~ x + (1|a) + (1|a:b), data=df)

stepcAIC(mod3, data=df, trace=TRUE, direction="backward")

}

}
\author{
David Ruegamer
}