File: lmrob.control.Rd

package info (click to toggle)
robustbase 0.99-4-1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,552 kB
  • sloc: fortran: 3,245; ansic: 3,243; sh: 15; makefile: 2
file content (344 lines) | stat: -rw-r--r-- 16,761 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
\name{lmrob.control}
\title{Tuning Parameters for lmrob() and Auxiliaries}
\encoding{utf8}
\alias{lmrob.control}
\alias{update.lmrobCtrl}
\alias{.Mchi.tuning.default}
\alias{.Mpsi.tuning.default}
\alias{.Mchi.tuning.defaults}
\alias{.Mpsi.tuning.defaults}
\description{
  Tuning parameters for \code{\link{lmrob}}, the MM-type regression
  estimator and the associated S-, M- and D-estimators.  Using
  \code{setting="KS2011"} sets the defaults as suggested by
  Koller and Stahel (2011) and analogously for \code{"KS2014"}.

  The \code{.M*.default} \code{\link{function}}s and
  \code{.M*.defaults} \code{\link{list}}s contain default tuning
  parameters for all the predefined \eqn{\psi}{psi} functions, see also
  \code{\link{Mpsi}}, etc.
}
\usage{
lmrob.control(setting, seed = NULL, nResample = 500,
              tuning.chi = NULL, bb = 0.5, tuning.psi = NULL,
              max.it = 50, groups = 5, n.group = 400,
              k.fast.s = 1, best.r.s = 2,
              k.max = 200, maxit.scale = 200, k.m_s = 20,
              refine.tol = 1e-7, rel.tol = 1e-7, scale.tol = 1e-10, solve.tol = 1e-7,
              zero.tol = 1e-10,
              trace.lev = 0,
              mts = 1000, subsampling = c("nonsingular", "simple"),
              compute.rd = FALSE, method = "MM", psi = "bisquare",
              numpoints = 10, cov = NULL,
              split.type = c("f", "fi", "fii"), fast.s.large.n = 2000,
              # only for outlierStats() :
              eps.outlier = function(nobs) 0.1 / nobs,
              eps.x = function(maxx) .Machine$double.eps^(.75)*maxx,
              compute.outlier.stats = method,
              warn.limit.reject = 0.5,
              warn.limit.meanrw = 0.5, \dots)

\method{update}{lmrobCtrl}(object, \dots)

.Mchi.tuning.defaults
.Mchi.tuning.default(psi)
.Mpsi.tuning.defaults
.Mpsi.tuning.default(psi)
}
\arguments{
  \item{setting}{a string specifying alternative default values.  Leave
    empty for the defaults or use \code{"KS2011"} or \code{"KS2014"}
    for the defaults suggested by Koller and Stahel (2011, 2017).
    See \emph{Details}.}
  \item{seed}{\code{NULL} or an integer vector compatible with
    \code{\link{.Random.seed}}: the seed to be used for random
    re-sampling used in obtaining candidates for the initial
    S-estimator.  The current value of \code{.Random.seed} will be
    preserved if \code{seed} is set, i.e. non-\code{NULL};
    otherwise, as by default, \code{.Random.seed} will be used and
    modified as usual from calls to \code{\link{runif}()} etc.
  }
  \item{nResample}{number of re-sampling candidates to be
    used to find the initial S-estimator.  Currently defaults to 500
    which works well in most situations (see references).}
  \item{tuning.chi}{tuning constant vector for the S-estimator.  If
    \code{NULL}, as by default, sensible defaults are set (depending on
    \code{psi}) to yield a 50\% breakdown estimator.  See \emph{Details}.}
  \item{bb}{expected value under the normal model of the
    \dQuote{chi} (rather \eqn{\rho (rho)}{rho}) function with tuning
    constant equal to \code{tuning.chi}.  This is used to compute the
    S-estimator.}
  \item{tuning.psi}{tuning constant vector for the redescending
    M-estimator.  If \code{NULL}, as by default, this is set (depending
    on \code{psi}) to yield an estimator with asymptotic efficiency of
    95\% for normal errors.  See \emph{Details}.}
  \item{max.it}{integer specifying the maximum number of IRWLS iterations.}
  \item{groups}{(for the fast-S algorithm): Number of
    random subsets to use when the data set is large.}
  \item{n.group}{(for the fast-S algorithm): Size of each of the
    \code{groups} above.  Note that this must be at least \eqn{p}.}
  \item{k.fast.s}{(for the fast-S algorithm): Number of
    local improvement steps (\dQuote{\emph{I-steps}}) for each
    re-sampling candidate.}
  \item{k.m_s}{(for the M-S algorithm): specifies after how many
    unsuccessful refinement steps the algorithm stops.}
  \item{best.r.s}{(for the fast-S algorithm): Number of
    of best candidates to be iterated further (i.e.,
    \dQuote{\emph{\bold{r}efined}}); is denoted \eqn{t} in
    Salibian-Barrera & Yohai(2006).}
  \item{k.max}{(for the fast-S algorithm): maximal number of
    refinement steps for the \dQuote{fully} iterated best candidates.}
  \item{maxit.scale}{integer specifying the maximum number of C level
    \code{find_scale()} iterations (in fast-S and M-S algorithms).}
  \item{refine.tol}{(for the fast-S algorithm): relative convergence
    tolerance for the fully iterated best candidates.}
  \item{rel.tol}{(for the RWLS iterations of the MM algorithm): relative
    convergence tolerance for the parameter vector.}
  \item{scale.tol}{(for the scale estimation iterations of the S algorithm): relative
    convergence tolerance for the \code{scale} \eqn{\sigma(.)}.}
  \item{solve.tol}{(for the S algorithm): relative
    tolerance for inversion.  Hence, this corresponds to
    \code{\link{solve.default}()}'s \code{tol}.}
  \item{zero.tol}{for checking 0-residuals in the S algorithm, non-negative number
    \eqn{\epsilon_z}{ez} such that
    \eqn{\{i; \left|\tilde{R}_i\right| \le \epsilon_z\}}{{i; |R~[i]| <= ez}}
    correspond to \eqn{0}-residuals, where \eqn{\tilde{R}_i}{R~[i]} are standardized residuals,
    \eqn{\tilde{R}_i = R_i/s_y}{R~[i] = R[i]/sy} and
    \eqn{s_y = \frac{1}{n} \sum_{i=1}^n \left|y_i\right|}{%
          sy = ave_i |y[i]| = 1/n sum(i=1..n, |y[i]|)}.}
  \item{trace.lev}{integer indicating if the progress of the MM-algorithm
    and the fast-S algorithms, see \code{\link{lmrob.S}},
    should be traced (increasingly); default \code{trace.lev = 0} does
    no tracing.}
  \item{mts}{maximum number of samples to try in subsampling
    algorithm.}
  \item{subsampling}{type of subsampling to be used, a string:
    \code{"simple"} for simple subsampling (default prior to version 0.9),
    \code{"nonsingular"} for nonsingular subsampling.  See also
    \code{\link{lmrob.S}}.}
  \item{compute.rd}{logical indicating if robust distances (based on
    the MCD robust covariance estimator \code{\link{covMcd}}) are to be
    computed for the robust diagnostic plots.  This may take some
    time to finish, particularly for large data sets, and can lead to
    singularity problems when there are \code{\link{factor}} explanatory
    variables (with many levels, or levels with \dQuote{few}
    observations).  Hence, is \code{FALSE} by default.}
  \item{method}{string specifying the estimator-chain. \code{MM}
    is interpreted as \code{SM}.  See \emph{Details} of
    \code{\link{lmrob}} for a description of the possible values.}
  \item{psi}{string specifying the type \eqn{\psi}-function
    used.  See \emph{Details} of \code{\link{lmrob}}.  Defaults to
    \code{"bisquare"} for S and MM-estimates, otherwise \code{"lqq"}.}
  \item{numpoints}{number of points used in Gauss quadrature.}
  \item{cov}{function or string with function name to be used to
    calculate covariance matrix estimate.  The default is
    \code{if(method \%in\% c('SM', 'MM')) ".vcov.avar1" else ".vcov.w"}.
    See \emph{Details} of \code{\link{lmrob}}.}
  \item{split.type}{determines how categorical and continuous variables
    are split.  See \code{\link{splitFrame}}.}
  \item{fast.s.large.n}{minimum number of observations required to
    switch from ordinary \dQuote{fast S} algorithm to an efficient
    \dQuote{large n} strategy.}
  \item{eps.outlier}{limit on the robustness weight below which an observation
    is considered to be an outlier.
    Either a \code{numeric(1)} or a function that takes the number of observations as
    an argument.  Used only in \code{\link{summary.lmrob}} and
    \code{\link{outlierStats}}.}
  \item{eps.x}{limit on the absolute value of the elements of the design matrix
    below which an element is considered zero.
    Either a \code{numeric(1)} or a function that takes the maximum absolute value in
    the design matrix as an argument.}
  \item{compute.outlier.stats}{vector of \code{\link{character}}
    strings, each valid to be used as \code{method} argument.  Used to
    specify for which estimators outlier statistics (and warnings)
    should be produced.  Set to empty (\code{NULL} or \code{character(0)})
    if none are required.
    \cr Note that the default is \code{method} which by default is either
    \code{"MM"}, \code{"SM"}, or \code{"SMDM"}; hence using
    \code{compute.outlier.stats = "S"} provides \code{\link{outlierStats}()}
    to a \code{\link{lmrob.S}()} result.}
  \item{warn.limit.reject}{limit of ratio
    \eqn{\#\mbox{rejected} / \#\mbox{obs in level}}{# rejected / # obs in level}
    above (\eqn{\geq}{>=}) which a warning is produced.
    Set to \code{NULL} to disable warning.}
  \item{warn.limit.meanrw}{limit of the mean robustness per factor level
    below which (\eqn{\leq}{<=}) a warning is produced.
    Set to \code{NULL} to disable warning.}

  \item{object}{an \code{"lmrobCtrl"} object, as resulting from a
    \code{lmrob.control(*)} or an \code{update(<lmrobCtrl>, *)} call.}
  \item{\dots}{for \describe{
      \item{\code{lmrob.control()}:}{further arguments to be added as
	\code{\link{list}} components to the result, e.g., those to be used in
	\code{.vcov.w()}.}
      \item{\code{update(object, *)}:}{(named) components from
	\code{object}, to be \emph{modified}, \bold{not} \code{setting = *}.}
  }}
}
\value{
  \code{.Mchi.tuning.default(psi)} and \code{.Mpsi.tuning.default(psi)}
  return a short \code{\link{numeric}} vector of tuning constants which
  are defaults for the corresponding psi-function, see the \emph{Details}.
  They are based on the named \code{\link{list}}s
  \code{.Mchi.tuning.defaults} and \code{.Mpsi.tuning.defaults},
  respectively.

  \code{lmrob.control()} returns a named \code{\link{list}} with over
  twenty components, corresponding to the arguments, where
  \code{tuning.psi} and \code{tuning.chi} are typically computed, as
  \code{.Mpsi.tuning.default(psi)} or \code{.Mchi.tuning.default(psi)},
  respectively.
  It is of \code{\link{class}} \code{"lmrobCtrl"} and we provide
  \code{print()}, \code{\link{update}()} and \code{\link{within}} methods.

  \code{update(<lmrobCtrl>, ....)} does \emph{not} allow a
  \code{setting="<...>"} in \code{....}.
}
\details{The option \code{setting="KS2011"} alters the default
  arguments.  They are changed to \code{method = "SMDM"}, \code{psi = "lqq"},
  \code{max.it = 500}, \code{k.max = 2000}, \code{cov = ".vcov.w"}.
  The defaults of all the remaining arguments are not changed.

  The option \code{setting="KS2014"} builds upon \code{setting="KS2011"}.
  More arguments are changed to \code{best.r.s = 20, k.fast.s = 2,
  nResample = 1000}.  This setting should produce more stable estimates
  for designs with \code{\link{factor}}s.

  By default, and in \code{.Mpsi.tuning.default()} and \code{.Mchi.tuning.default()},
  \code{tuning.chi} and \code{tuning.psi} are set to yield an
  MM-estimate with breakdown point \eqn{0.5} and efficiency of 95\% at
  the normal.

  If numeric \code{tuning.chi} or \code{tuning.psi} are specified, say
  \code{cc}, for \code{psi = "ggw"} or \code{"lqq"},
  \code{\link{.psi.const}(cc, psi)} is used, see its help page.

  To get the defaults, e.g., \code{.Mpsi.tuning.default(psi)} is
  equivalent to but more efficient than the formerly widely used
  \code{lmrob.control(psi = psi)$tuning.psi}.

  These defaults are:
  \tabular{rll}{
    \code{psi}     \tab\code{tuning.chi}               \tab\code{tuning.psi} \cr
    \code{bisquare}\tab\code{1.54764}                  \tab\code{4.685061} \cr
    \code{welsh}   \tab\code{0.5773502}                \tab\code{2.11} \cr
    \code{ggw} 	   \tab\code{c(-0.5, 1.5, NA, 0.5)}    \tab\code{c(-0.5, 1.5, 0.95, NA)} \cr
    \code{lqq}     \tab\code{c(-0.5, 1.5, NA, 0.5)}    \tab\code{c(-0.5, 1.5, 0.95, NA)} \cr
    \code{optimal} \tab\code{0.4047}                   \tab\code{1.060158} \cr
    \code{hampel}  \tab\code{c(1.5, 3.5, 8)*0.2119163} \tab\code{c(1.5, 3.5, 8)*0.9014}
  }
  The values for the tuning constant for the \code{ggw} and \code{lqq}
  psi functions are specified differently here by a vector with four
  elements: minimal slope, b (controlling the bend at the maximum of the curve),
  efficiency, breakdown point.
  Use \code{NA} for an unspecified value of either efficiency or
  breakdown point, see examples in the tables (above and below).
  For these table examples, the respective \dQuote{inner constants} are
  stored precomputed, see \code{\link{.psi.lqq.findc}} for more.

  The constants for the \code{"hampel"} psi function are chosen to have a
  redescending slope of \eqn{-1/3}.  Constants for a slope of \eqn{-1/2}
  would be
  \tabular{rll}{
    \code{psi}     \tab\code{tuning.chi}             \tab\code{tuning.psi} \cr
    \code{"hampel"}\tab\code{c(2, 4, 8) * 0.1981319} \tab\code{c(2, 4, 8) * 0.690794}
  }

  Alternative coefficients for an efficiency of 85\%
  at the normal are given in the table below.
  \tabular{rl}{
    \code{psi}		  \tab\code{tuning.psi} \cr
    \code{bisquare}	  \tab\code{3.443689} \cr
    \code{welsh}	  \tab\code{1.456} \cr
    \code{ggw}, \code{lqq}\tab\code{c(-0.5, 1.5, 0.85, NA)} \cr
    \code{optimal}        \tab\code{0.8684} \cr
    \code{hampel} (-1/3)  \tab\code{c(1.5, 3.5, 8)* 0.5704545} \cr
    \code{hampel} (-1/2)  \tab\code{c( 2,  4,  8) * 0.4769578}
  }
}
\references{
  Koller, M. and Stahel, W.A. (2011)
  Sharpening Wald-type inference in robust regression for small samples.
  \emph{Computational Statistics & Data Analysis} \bold{55}(8), 2504--2515.

  Koller, M. and Stahel, W.A. (2017)
  Nonsingular subsampling for regression S estimators with categorical predictors,
  \emph{Computational Statistics} \bold{32}(2): 631--646.
  \doi{10.1007/s00180-016-0679-x}.
  Referred as \code{"KS2014"} everywhere in \pkg{robustbase};  A shorter first
  version, Koller (2012) has been available from \url{https://arxiv.org/abs/1208.5595}.
}
\author{Matias Salibian-Barrera, Martin Maechler and Manuel Koller}
\seealso{  \code{\link{Mpsi}}, etc, for the (fast!) psi function computations;
  \code{\link{lmrob}}, also for references and examples.
}
\examples{
## Show the default settings:
str(lmrob.control())

## Artificial data for a  simple  "robust t test":
set.seed(17)
y <- y0 <- rnorm(200)
y[sample(200,20)] <- 100*rnorm(20)
gr <- as.factor(rbinom(200, 1, prob = 1/8))
lmrob(y0 ~ 0+gr)

## Use  Koller & Stahel(2011)'s recommendation but a larger  'max.it':
str(ctrl <- lmrob.control("KS2011", max.it = 1000))

str(.Mpsi.tuning.defaults)
stopifnot(identical(.Mpsi.tuning.defaults,
                   sapply(names(.Mpsi.tuning.defaults),
                          .Mpsi.tuning.default)))
## Containing (names!) all our (pre-defined) redescenders:
str(.Mchi.tuning.defaults)

## Difference between settings:
Cdef <- lmrob.control()
C11 <- lmrob.control("KS2011")
C14 <- lmrob.control("KS2014")
str(C14)
## Differences:
diffD <- names(which(!mapply(identical, Cdef,C11, ignore.environment=TRUE)))
diffC <- names(which(!mapply(identical, C11, C14, ignore.environment=TRUE)))
## KS2011 vs KS2014:  Apart from `setting` itself, they only differ in three places:
cbind(KS11 = unlist(C11[diffC[-1]]),
      KS14 = unlist(C14[diffC[-1]]))
##           KS11 KS14
## nResample  500 1000
## best.r.s     2   20
## k.fast.s     1    2
## default vs KS2011: a bit more: setting + 8
str2simpLang <-  function(x) {
    r <- if(is.null(x)) quote((NULL)) else str2lang(deparse(x))
    if(is.call(r)) format(r) else r
}
cbind(deflt= lapply(Cdef[diffD], str2simpLang),
      KS11 = lapply(C11 [diffD], str2simpLang))

## update()ing a lmrob.control() , e.g.,
C14mod <- update(C14, trace.lev = 2) # the same as
C14m.d <- C14; C14m.d$trace.lev <- 2
stopifnot(identical(C14mod, C14m.d))
## changing psi --> updates tuning.{psi,chi}:
C14mp <- update(C14, psi = "hampel", seed=101)
## updating 'method' is "smart" :
C.SMDM <- update(Cdef, method="SMDM")
all.equal(Cdef, C.SMDM) # changed also psi, tuning.{psi,chi} and cov !
chgd <- c("method", "psi", "tuning.chi",  "tuning.psi", "cov")
str(Cdef  [chgd])
str(C.SMDM[chgd])
C14m <- update(C14, method="SMM")
(ae <- all.equal(C14, C14mp))# changed tuning.psi & tuning.chi, too
stopifnot(exprs = {
    identical(C14, update(C14, method="SMDM")) # no change!
    identical(c("psi", "seed", "tuning.chi", "tuning.psi"),
              sort(gsub("[^.[:alpha:]]", "", sub(":.*", "", sub("^Component ", "", ae)))))
    identical(C14m, local({C <- C14; C$method <- "SMM"; C}))
})
##
try( update(C14, setting="KS2011") ) #--> Error: .. not allowed
\dontshow{tools::assertError(update(C14, setting="KS2011"))}
}
\keyword{robust}
\keyword{regression}