File: lmrob.control.Rd

package info (click to toggle)
robustbase 0.91-1-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 5,080 kB
  • ctags: 645
  • sloc: fortran: 3,156; ansic: 2,731; makefile: 1
file content (216 lines) | stat: -rw-r--r-- 10,168 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
\name{lmrob.control}
\title{Tuning Parameters for lmrob() and Auxiliaries}
\encoding{utf8}
\alias{lmrob.control}
\alias{lmrob.control}
\alias{.Mchi.tuning.default}
\alias{.Mpsi.tuning.default}
\alias{.Mchi.tuning.defaults}
\alias{.Mpsi.tuning.defaults}
\description{
  Tuning parameters for \code{\link{lmrob}}, the MM-type regression
  estimator and the associated S-, M- and D-estimators.  Using
  \code{setting="KS2011"} sets the defaults as suggested by
  Koller and Stahel (2011).
}
\usage{
lmrob.control(setting, seed = NULL, nResample = 500,
              tuning.chi = NULL, bb = 0.5, tuning.psi = NULL,
              max.it = 50, groups = 5, n.group = 400,
              k.fast.s = 1, best.r.s = 2,
              k.max = 200, maxit.scale = 200, k.m_s = 20,
              refine.tol = 1e-7, rel.tol = 1e-7, solve.tol = 1e-7,
              trace.lev = 0,
              mts = 1000, subsampling = c("nonsingular", "simple"),
              compute.rd = FALSE, method = "MM", psi = "bisquare",
              numpoints = 10, cov = NULL,
              split.type = c("f", "fi", "fii"), fast.s.large.n = 2000, ...)

.Mchi.tuning.defaults
.Mchi.tuning.default(psi)
.Mpsi.tuning.defaults
.Mpsi.tuning.default(psi)
}
\arguments{
  \item{setting}{a string specifying alternative default values.  Leave
    empty for the defaults or use \code{"KS2011"} for the defaults
    suggested by Koller and Stahel (2011).  See \emph{Details}.}
  \item{seed}{\code{NULL} or an integer vector compatible with
    \code{\link{.Random.seed}}: the seed to be used for random
    re-sampling used in obtaining candidates for the initial
    S-estimator.  The current value of \code{.Random.seed} will be
    preserved if \code{seed} is set, i.e. non-\code{NULL};
    otherwise, as by default, \code{.Random.seed} will be used and
    modified as usual from calls to \code{\link{runif}()} etc.
  }
  \item{nResample}{number of re-sampling candidates to be
    used to find the initial S-estimator.  Currently defaults to 500
    which works well in most situations (see references).}
  \item{tuning.chi}{tuning constant vector for the S-estimator.  If
    \code{NULL}, as by default, sensible defaults are set (depending on
    \code{psi}) to yield a 50\% breakdown estimator.  See \emph{Details}.}
  \item{bb}{expected value under the normal model of the
    \dQuote{chi} (rather \eqn{\rho (rho)}{rho}) function with tuning
    constant equal to \code{tuning.chi}.  This is used to compute the
    S-estimator.}
  \item{tuning.psi}{tuning constant vector for the redescending
    M-estimator.  If \code{NULL}, as by default, this is set (depending
    on \code{psi}) to yield an estimator with asymptotic efficiency of
    95\% for normal errors.  See \emph{Details}.}
  \item{max.it}{integer specifying the maximum number of IRWLS iterations.}
  \item{groups}{(for the fast-S algorithm): Number of
    random subsets to use when the data set is large.}
  \item{n.group}{(for the fast-S algorithm): Size of each of the
    \code{groups} above.  Note that this must be at least \eqn{p}.}
  \item{k.fast.s}{(for the fast-S algorithm): Number of
    local improvement steps (\dQuote{\emph{I-steps}}) for each
    re-sampling candidate.}
  \item{k.m_s}{(for the M-S algorithm): specifies after how many
    unsucessful refinement steps the algorithm stops.}
  \item{best.r.s}{(for the fast-S algorithm): Number of
    of best candidates to be iterated further (i.e.,
    \dQuote{\emph{\bold{r}efined}}); is denoted \eqn{t} in
    Salibian-Barrera & Yohai(2006).}
  \item{k.max}{(for the fast-S algorithm): maximal number of
    refinement steps for the \dQuote{fully} iterated best candidates.}
  \item{maxit.scale}{integer specifying the maximum number of C level
    \code{find_scale()} iterations.}
  \item{refine.tol}{(for the fast-S algorithm): relative convergence
    tolerance for the fully iterated best candidates.}
  \item{rel.tol}{(for the RWLS iterations of the MM algorithm): relative
    convergence tolerance for the parameter vector.}
  \item{solve.tol}{(for the S algorithm): relative
    tolerance for inversion.  Hence, this corresponds to
    \code{\link{solve.default}()}'s \code{tol}.}
  \item{trace.lev}{integer indicating if the progress of the MM-algorithm
    should be traced (increasingly); default \code{trace.lev = 0} does
    no tracing.}
  \item{mts}{maximum number of samples to try in subsampling
    algorithm.}
  \item{subsampling}{type of subsampling to be used, a string:
    \code{"simple"} for simple subsampling (default prior to version 0.9),
    \code{"nonsingular"} for nonsingular subsampling.  See also
    \code{\link{lmrob.S}}.}
  \item{compute.rd}{logical indicating if robust distances (based on
    the MCD robust covariance estimator \code{\link{covMcd}}) are to be
    computed for the robust diagnostic plots.  This may take some
    time to finish, particularly for large data sets, and can lead to
    singularity problems when there are \code{\link{factor}} explanatory
    variables (with many levels, or levels with \dQuote{few}
    observations). Hence, is \code{FALSE} by default.}
  \item{method}{string specifying the estimator-chain. \code{MM}
    is interpreted as \code{SM}. See \emph{Details} of
    \code{\link{lmrob}} for a description of the possible values.}
  \item{psi}{string specifying the type \eqn{\psi}-function
    used.  See \emph{Details} of \code{\link{lmrob}}.  Defaults to
    \code{"bisquare"} for S and MM-estimates, otherwise \code{"lqq"}.}
  \item{numpoints}{number of points used in Gauss quadrature.}
  \item{cov}{function or string with function name to be used to
    calculate covariance matrix estimate.  The default is
    \code{if(method \%in\% c('SM', 'MM')) ".vcov.avar1" else ".vcov.w"}.
    See \emph{Details} of \code{\link{lmrob}}.}
  \item{split.type}{determines how categorical and continuous variables
    are split. See \code{\link{splitFrame}}.}
  \item{fast.s.large.n}{minimum number of observations required to
    switch from ordinary \dQuote{fast S} algorithm to an efficient
    \dQuote{large n} strategy.}
  \item{...}{further arguments to be added as \code{\link{list}}
    components to the result, e.g., those to be used in \code{.vcov.w()}.}
}
\value{
  \code{.Mchi.tuning.default(psi)} and \code{.Mpsi.tuning.default(psi)}
  return a short \code{\link{numeric}} vector of tuning constants which
  are defaults for the corresponding psi-function, see the \emph{Details}.
  They are based on the named \code{\link{list}}s
  \code{.Mchi.tuning.defaults} and \code{.Mpsi.tuning.defaults},
  respectively.

  \code{lmrob.control()} returns a named \code{\link{list}} with over
  twenty components, corresponding to the arguments, where
  \code{tuning.psi} and \code{tuning.chi} are typically computed, as
  \code{.Mpsi.tuning.default(psi)} or \code{.Mchi.tuning.default(psi)},
  respectively.
}
\details{The option \code{setting="KS2011"} alters the default
  arguments.  They are changed to \code{method = 'SMDM', psi = 'lqq',
    max.it = 500, k.max = 2000, cov = '.vcov.w'}.  The defaults of all
  the remaining arguments are not changed.

  By default, and in \code{.Mpsi.tuning.default()} and \code{.Mchi.tuning.default()},
  \code{tuning.chi} and \code{tuning.psi} are set to
  yield an MM-estimate with break-down point \eqn{0.5} and efficiency of
  95\% at the normal.

  To get these defaults, e.g., \code{.Mpsi.tuning.default(psi)} is
  equivalent to but more efficient than the formerly widely used
  \code{lmrob.control(psi = psi)$tuning.psi}.

  These defaults are:
  \tabular{rll}{
    \code{psi} \tab \code{tuning.chi} \tab \code{tuning.psi} \cr
    \code{bisquare} \tab \code{1.54764} \tab \code{4.685061} \cr
    \code{welsh} \tab \code{0.5773502} \tab \code{2.11} \cr
    \code{ggw} \tab \code{c(-0.5, 1.5, NA, 0.5)} \tab
    \code{c(-0.5, 1.5, 0.95, NA)} \cr
    \code{lqq} \tab \code{c(-0.5, 1.5, NA, 0.5)} \tab
    \code{c(-0.5, 1.5, 0.95, NA)} \cr
    \code{optimal} \tab \code{0.4047} \tab \code{1.060158} \cr
    \code{hampel} \tab \code{c(1.5, 3.5, 8)*0.2119163} \tab
    \code{c(1.5, 3.5, 8)*0.9014}
  }
  The values for the tuning constant for the \code{ggw} psi function are
  hard coded.  The constants vector has four elements: minimal slope, b
  (controlling the bend at the maximum of the curve), efficiency,
  break-down point.  Use \code{NA} for an unspecified value, see examples
  in the tables.

  The constants for the \code{"hampel"} psi function are chosen to have a
  redescending slope of \eqn{-1/3}.  Constants for a slope of \eqn{-1/2}
  would be
  \tabular{rll}{
    \code{psi} \tab \code{tuning.chi} \tab \code{tuning.psi} \cr
    \code{"hampel"} \tab \code{c(2, 4, 8) * 0.1981319} \tab
    \code{c(2, 4, 8) * 0.690794}
  }

  Alternative coefficients for an efficiency of 85\%
  at the normal are given in the table below.
  \tabular{rl}{
    \code{psi} \tab \code{tuning.psi} \cr
    \code{bisquare} \tab \code{3.443689} \cr
    \code{welsh} \tab \code{1.456} \cr
    \code{ggw}, \code{lqq} \tab \code{c(-0.5, 1.5, 0.85, NA)} \cr
    \code{optimal} \tab \code{0.8684} \cr
    \code{hampel} (-1/3) \tab \code{c(1.5, 3.5, 8)* 0.5704545} \cr
    \code{hampel} (-1/2) \tab \code{c( 2,  4,  8) * 0.4769578}
  }
}
\references{
  Koller, M. and Stahel, W.A. (2011)
  Sharpening Wald-type inference in robust regression for small samples.
  \emph{Computational Statistics & Data Analysis} \bold{55}(8), 2504--2515.
}
\author{ Matias Salibian-Barrera, Martin Maechler and Manuel Koller}
\seealso{ \code{\link{lmrob}}, also for references and examples.
}
\examples{
## Show the default settings:
str(lmrob.control())

## Artificial data for a  simple  "robust t test":
set.seed(17)
y <- y0 <- rnorm(200)
y[sample(200,20)] <- 100*rnorm(20)
gr <- as.factor(rbinom(200, 1, prob = 1/8))
lmrob(y0 ~ 0+gr)

## Use  Koller & Stahel(2011)'s recommendation but a larger  'max.it':
str(ctrl <- lmrob.control("KS2011", max.it = 1000))

str(.Mpsi.tuning.defaults)
stopifnot(identical(.Mpsi.tuning.defaults,
                   sapply(names(.Mpsi.tuning.defaults),
                          .Mpsi.tuning.default)))
}
\keyword{robust}
\keyword{regression}