File: lmrob.Rd

package info (click to toggle)
robustbase 0.99-4-1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,552 kB
  • sloc: fortran: 3,245; ansic: 3,243; sh: 15; makefile: 2
file content (289 lines) | stat: -rw-r--r-- 12,876 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
\title{MM-type Estimators for Linear Regression}
\name{lmrob}
\encoding{utf8}
\alias{lmrob}
% "Link to here", even those are not exported:
\alias{.vcov.avar1}
\alias{.vcov.w}
\description{
  Computes fast MM-type estimators for linear (regression) models.
}
\usage{
lmrob(formula, data, subset, weights, na.action, method = "MM",
      model = TRUE, x = !control$compute.rd, y = FALSE,
      singular.ok = TRUE, contrasts = NULL, offset = NULL,
      control = NULL, init = NULL, ...)
}
\arguments{
  \item{formula}{a symbolic description of the model to be fit.  See
    \code{\link{lm}} and \code{\link{formula}} for more details.}

  \item{data}{an optional data frame, list or environment (or object
    coercible by \code{\link{as.data.frame}} to a data frame) containing
    the variables in the model.  If not found in \code{data}, the
    variables are taken from \code{environment(formula)},
    typically the environment from which \code{lmrob} is called.}

  \item{subset}{an optional vector specifying a subset of observations
    to be used in the fitting process.}

  \item{weights}{an optional vector of weights to be used in the fitting
    process (in addition to the robustness weights computed in the
    fitting process).}
  \item{na.action}{a function which indicates what should happen
    when the data contain \code{NA}s.  The default is set by
    the \code{na.action} setting of \code{\link{options}}, and is
    \code{\link{na.fail}} if that is unset.  The \dQuote{factory-fresh}
    default is \code{\link{na.omit}}.  Another possible value is
    \code{NULL}, no action.  Value \code{\link{na.exclude}} can be useful.}

  \item{method}{string specifying the estimator-chain. \code{MM}
    is interpreted as \code{SM}.  See \emph{Details}, notably the
    currently recommended \code{setting = "KS2014"}.}

  \item{model, x, y}{logicals.  If \code{TRUE} the corresponding
    components of the fit (the model frame, the model matrix, the
    response) are returned.}

  \item{singular.ok}{logical.  If \code{FALSE} (the default in S but
    not in \R) a singular fit is an error.}

  \item{contrasts}{an optional list.  See the \code{contrasts.arg}
    of \code{\link{model.matrix.default}}.}

  \item{offset}{this can be used to specify an \emph{a priori}
    known component to be included in the linear predictor
    during fitting.  An \code{\link{offset}} term can be included in the
    formula instead or as well, and if both are specified their sum is used.}

  \item{control}{a \code{\link{list}} specifying control parameters; use
    the function \code{\link{lmrob.control}(.)} and see its help page.}

  \item{init}{an optional argument to specify or supply the initial
    estimate. See \emph{Details}.}

  \item{\dots}{additional arguments can be used to specify control
    parameters directly instead of (but not in addition to!) via \code{control}.}
}
\details{
  \describe{
    \item{Overview:}{
      This function computes an MM-type regression estimator as
      described in Yohai (1987) and Koller and Stahel (2011).  By
      default it uses a bi-square redescending score function, and it
      returns a highly robust and highly efficient estimator (with 50\%
      breakdown point and 95\% asymptotic efficiency for normal errors).
      The computation is carried out by a call to \code{\link{lmrob.fit}()}.

      The argument \code{setting} of \code{\link{lmrob.control}} is provided
      to set alternative defaults as suggested in Koller and Stahel (2011)
      (\code{setting="KS2011"}; now do use its extension
      \code{setting="KS2014"}).  For further details, see \code{\link{lmrob.control}}.
    }
    \item{Initial Estimator \code{init}:}{
      The initial estimator may be specified using the argument
      \code{init}.  This can either be
      \itemize{
	\item a \emph{string} used to specify built in internal
	estimators (currently \code{"S"} and \code{"M-S"}, see \emph{See also}
	below);
	\item a \code{\link{function}} taking arguments \code{x, y,
	  control, mf} (where \code{mf} stands for \code{model.frame}) and
	returning a \code{\link{list}} containing at least the initial coefficients as
	component \code{"coefficients"} and the initial scale estimate as
	\code{"scale"}.
	\item Or a \code{\link{list}} giving the initial coefficients and
	scale as components \code{"coefficients"} and \code{"scale"}.  See also
	\emph{Examples}.
      }
      Note that when \code{init} is a function or list, the
      \code{method} argument must \emph{not} contain the initial estimator, e.g.,
      use \code{MDM} instead of \code{SMDM}.

      The default, equivalent to \code{init = "S"}, uses as initial
      estimator an S-estimator (Rousseeuw and Yohai, 1984) which is
      computed using the Fast-S algorithm of Salibian-Barrera and Yohai
      (2006), calling \code{\link{lmrob.S}()}.  That function, since
      March 2012, by default uses \emph{nonsingular} subsampling which
      makes the Fast-S algorithm feasible for categorical data as well,
      see Koller (2012).  Note that convergence problems may still show
      up as warnings, e.g., \preformatted{
  S refinements did not converge (to refine.tol=1e-07) in 200 (= k.max) steps
}
      and often can simply be remedied by increasing (i.e. weakening)
      \code{refine.tol} or increasing the allowed number of iterations
      \code{k.max}, see \code{\link{lmrob.control}}.
    }
    \item{Method \code{method}:}{
      The following chain of estimates is customizable via the
      \code{method} argument. % of \code{\link{lmrob.control}}.
      There are currently two types of estimates available,
      \describe{
	\item{\code{"M"}:}{corresponds to the standard M-regression
	  estimate.}
	\item{\code{"D"}:}{stands for the Design Adaptive Scale estimate
	  as proposed in Koller and Stahel (2011).}
      }
      The \code{method} argument takes a string that specifies the
      estimates to be calculated as a chain.  Setting
      \code{method='SMDM'} will result in an intial S-estimate, followed
      by an M-estimate, a Design Adaptive Scale estimate and a final
      M-step.  For methods involving a \code{D}-step, the default value
      of \code{psi} (see \code{\link{lmrob.control}}) is changed to
      \code{"lqq"}.

      By default, standard errors are computed using the formulas of
      Croux, Dhaene and Hoorelbeke (2003) (\code{\link{lmrob.control}}
      option \code{cov=".vcov.avar1"}).  This method, however, works only
      for MM-estimates (i.e., \code{method = "MM"} or \code{ = "SM"}).  For other
      \code{method} arguments, the covariance matrix estimate used is based on the asymptotic
      normality of the estimated coefficients (\code{cov=".vcov.w"}) as
      described in Koller and Stahel (2011).
      The var-cov computation can be skipped by \code{cov = "none"} and
      (re)done later by e.g., \code{vcov(<obj>, cov = ".vcov.w")}.

      As of robustbase version 0.91-0 (April 2014), the computation of
      robust standard errors for \code{method="SMDM"} has been changed.
      The old behaviour can be restored by setting the control parameter
      \code{cov.corrfact = "tauold"}.%% FIXME: regr.test for that
    }
  }%end {describe}
}
\value{
  An object of class \code{lmrob}; a list including the following
  components:
  \item{coefficients}{The estimate of the coefficient vector}
  \item{scale}{The scale as used in the M estimator.}
  \item{residuals}{Residuals associated with the estimator.}
  %loss
  \item{converged}{\code{TRUE} if the IRWLS iterations have converged.}
  \item{iter}{number of IRWLS iterations}
  \item{rweights}{the \dQuote{robustness weights} \eqn{\psi(r_i/S) / (r_i/S)}.}
  \item{fitted.values}{Fitted values associated with the estimator.}
  %control
  \item{init.S}{The \code{\link{list}} returned by \code{\link{lmrob.S}()} or
    \code{\link{lmrob.M.S}()} (for MM-estimates, i.e., \code{method="MM"} or \code{"SM"} only)}
  \item{init}{A similar list that contains the results of intermediate
    estimates (\emph{not} for MM-estimates).}
  %qr
  \item{rank}{the numeric rank of the fitted linear model.}
  \item{cov}{The estimated covariance matrix of the regression
    coefficients}
  \item{df.residual}{the residual degrees of freedom.}
  %degree.freedom
  \item{weights}{the specified weights (missing if none were used).}
  \item{na.action}{(where relevant) information returned by
      \code{\link{model.frame}} on the special handling of \code{NA}s.}
  \item{offset}{the offset used (missing if none were used).}
  \item{contrasts}{(only where relevant) the contrasts used.}
  \item{xlevels}{(only where relevant) a record of the levels of the factors
    used in fitting.}
  \item{call}{the matched call.}
  \item{terms}{the \code{terms} object used.}
  %assign
  \item{model}{if requested (the default), the model frame used.}
  \item{x}{if requested, the model matrix used.}
  \item{y}{if requested, the response used.}
  In addition, non-null fits will have components \code{assign},
  and \code{qr} relating to the linear fit, for use by extractor
      functions such as \code{summary}.
}
\references{
  Croux, C., Dhaene, G. and Hoorelbeke, D. (2003)
  \emph{Robust standard errors for robust estimators},
  Discussion Papers Series 03.16, K.U. Leuven, CES.

  Koller, M. (2012)
  Nonsingular subsampling for S-estimators with categorical predictors,
  \emph{ArXiv e-prints} \url{https://arxiv.org/abs/1208.5595};
  extended version published as Koller and Stahel (2017), see
  \code{\link{lmrob.control}}.

  Koller, M. and Stahel, W.A. (2011)
  Sharpening Wald-type inference in robust regression for small samples.
  \emph{Computational Statistics & Data Analysis} \bold{55}(8), 2504--2515.

  Maronna, R. A., and Yohai, V. J. (2000)
  Robust regression with both continuous and categorical predictors.
  \emph{Journal of Statistical Planning and Inference} \bold{89}, 197--214.

  Rousseeuw, P.J. and Yohai, V.J. (1984)
  Robust regression by means of S-estimators,
  In \emph{Robust and Nonlinear Time Series},
  J. Franke, W. Härdle and R. D. Martin (eds.).
  Lectures Notes in Statistics 26, 256--272,
  Springer Verlag, New York.

  Salibian-Barrera, M. and Yohai, V.J. (2006)
  A fast algorithm for S-regression estimates,
  \emph{Journal of Computational and Graphical Statistics} \bold{15}(2), 414--427.
  \doi{10.1198/106186006X113629}

  Yohai, V.J. (1987)
  High breakdown-point and high efficiency estimates for regression.
  \emph{The Annals of Statistics} \bold{15}, 642--65.

  Yohai, V., Stahel, W.~A. and Zamar, R. (1991)
  A procedure for robust estimation and inference in linear regression;
  in Stahel and Weisberg (eds), \emph{Directions in Robust Statistics
    and Diagnostics}, Part II, Springer, New York, 365--374;
  \doi{10.1007/978-1-4612-4444-8_20}.
}
\author{(mainly:) Matias Salibian-Barrera and Manuel Koller}
\seealso{
  \code{\link{lmrob.control}};
  for the algorithms \code{\link{lmrob.S}}, \code{\link{lmrob.M.S}} and
  \code{\link{lmrob.fit}};
  and for methods,
  \code{\link{summary.lmrob}}, for the extra \dQuote{statistics},
  notably \eqn{R^2} (\dQuote{R squared});
  \code{\link{predict.lmrob}},
  \code{\link{print.lmrob}}, \code{\link{plot.lmrob}}, and
  \code{\link{weights.lmrob}}.
}
\examples{
data(coleman)
set.seed(0)
## Default for a very long time:
summary( m1 <- lmrob(Y ~ ., data=coleman) )

## Nowadays **strongly recommended** for routine use:
summary(m2 <- lmrob(Y ~ ., data=coleman, setting = "KS2014") )
##                                       ------------------

plot(residuals(m2) ~ weights(m2, type="robustness")) ##-> weights.lmrob()
abline(h=0, lty=3)

data(starsCYG, package = "robustbase")
## Plot simple data and fitted lines
plot(starsCYG)
  lmST <-    lm(log.light ~ log.Te, data = starsCYG)
(RlmST <- lmrob(log.light ~ log.Te, data = starsCYG))
abline(lmST, col = "red")
abline(RlmST, col = "blue")
## --> Least Sq.:/ negative slope  \\ robust: slope ~= 2.2 % checked in ../tests/lmrob-data.R
summary(RlmST) # -> 4 outliers; rest perfect
vcov(RlmST)
stopifnot(all.equal(fitted(RlmST),
                    predict(RlmST, newdata = starsCYG), tol = 1e-14))
## FIXME: setting = "KS2011"  or  setting = "KS2014"  **FAIL** here

##--- 'init' argument -----------------------------------
## 1)  string
set.seed(0)
m3 <- lmrob(Y ~ ., data=coleman, init = "S")
stopifnot(all.equal(m1[-18], m3[-18]))
## 2) function
initFun <- function(x, y, control, ...) { # no 'mf' needed
    init.S <- lmrob.S(x, y, control)
    list(coefficients=init.S$coef, scale = init.S$scale)
}
set.seed(0)
m4 <- lmrob(Y ~ ., data=coleman, method = "M", init = initFun)
## list
m5 <- lmrob(Y ~ ., data=coleman, method = "M",
            init = list(coefficients = m3$init$coef, scale = m3$scale))
stopifnot(all.equal(m4[-17], m5[-17]))
}
\keyword{robust}
\keyword{regression}