File: salinity.Rd

package info (click to toggle)
robustbase 0.99-4-1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,552 kB
  • sloc: fortran: 3,245; ansic: 3,243; sh: 15; makefile: 2
file content (76 lines) | stat: -rw-r--r-- 2,989 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
\name{salinity}
\alias{salinity}
\docType{data}
\title{Salinity Data}
\description{
  This is a data set consisting of measurements of water salinity (i.e.,
  its salt concentration) and river discharge taken in North Carolina's
  Pamlico Sound, recording some bi-weekly averages in March, April,
  and May from 1972 to 1977.  This dataset was listed by Ruppert and
  Carroll (1980).  In Carrol and Ruppert (1985) the physical background of the
  data is described.  They indicated that observations 5 and 16
  correspond to periods of very heavy discharge and showed that the
  discrepant observation 5 was masked by observations 3 and 16, i.e.,
  only after deletion of these observations it was possible to identify
  the influential observation 5.

  This data set is a prime example of the \emph{masking effect}.
}
\usage{data(salinity, package="robustbase")}
\format{
  A data frame with 28 observations on the following 4 variables
  (in parentheses are the names used in the 1980 reference).
  \describe{
    \item{\code{X1}:}{Lagged Salinity  (\sQuote{SALLAG})}
    \item{\code{X2}:}{Trend  (\sQuote{TREND})}
    \item{\code{X3}:}{Discharge  (\sQuote{H2OFLOW})}
    \item{\code{Y}:}{Salinity   (\sQuote{SALINITY})}
  }
}
\note{The \CRANpkg{boot} package contains another version of this salinity
  data set, also attributed to Ruppert and Carroll (1980), but with two
  clear transcription errors, see the examples.
}
\source{
 P. J. Rousseeuw and A. M. Leroy (1987)
 \emph{Robust Regression and Outlier Detection};
 Wiley, p.82, table 5.

 Ruppert, D. and Carroll, R.J. (1980)
 Trimmed least squares estimation in the  linear model.
 \emph{JASA} \bold{75}, 828--838; table 3, p.835.

 Carroll, R.J. and Ruppert, D. (1985)
 Transformations in regression: A robust analysis.
 \emph{Technometrics} \bold{27}, 1--12
}
\examples{
data(salinity)
summary(lm.sali  <-        lm(Y ~ . , data = salinity))
summary(rlm.sali <- MASS::rlm(Y ~ . , data = salinity))
summary(lts.sali <-    ltsReg(Y ~ . , data = salinity))

salinity.x <- data.matrix(salinity[, 1:3])
c_sal <- covMcd(salinity.x)
plot(c_sal, "tolEllipsePlot")

## Connection with boot package's version :
if(requireNamespace("boot")) { ## 'always'
 print( head(boot.sal <- boot::salinity        ) )
 print( head(robb.sal <- salinity [, c(4, 1:3)]) ) # difference: has one digit more
 ## Otherwise the same ?
 dimnames(robb.sal) <- dimnames(boot.sal)
 ## apart from the 4th column, they are "identical":
 stopifnot( all.equal(boot.sal[, -4], robb.sal[, -4], tol = 1e-15) )

 ## But the discharge ('X3', 'dis' or 'H2OFLOW')  __differs__ in two places:
 plot(cbind(robustbase = robb.sal[,4], boot = boot.sal[,4]))
 abline(0,1, lwd=3, col=adjustcolor("red", 1/4))
 D.sal <- robb.sal[,4] - boot.sal[,4]
 stem(robb.sal[,4] - boot.sal[,4])
 which(abs(D.sal) > 0.01) ## 2 8
 ## *two* typos (=> difference ~= 1) in the version of 'boot': obs. 2 & 8 !!!
 cbind(robb = robb.sal[,4], boot = boot.sal[,4], D.sal)
}# boot
}
\keyword{datasets}