File: adjbox.Rd

package info (click to toggle)
robustbase 0.99-4-1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,552 kB
  • sloc: fortran: 3,245; ansic: 3,243; sh: 15; makefile: 2
file content (169 lines) | stat: -rw-r--r-- 7,835 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
\name{adjbox}
\title{Plot an Adjusted Boxplot for Skew Distributions}
\alias{adjbox}
\alias{adjbox.default}
\alias{adjbox.formula}
\description{
  Produces boxplots adjusted for skewed distributions as proposed in
  Hubert and Vandervieren (2008).
}
\usage{
adjbox(x, \dots)

\method{adjbox}{formula}(formula, data = NULL, \dots, subset, na.action = NULL)

\method{adjbox}{default}(x, \dots, range = 1.5, doReflect = FALSE,
        width = NULL, varwidth = FALSE,
        notch = FALSE, outline = TRUE, names, plot = TRUE,
        border = par("fg"), col = NULL, log = "",
        pars = list(boxwex = 0.8, staplewex = 0.5, outwex = 0.5),
        horizontal = FALSE, add = FALSE, at = NULL)
}
\arguments{
    \item{formula}{a formula, such as \code{y ~ grp}, where \code{y} is a
    numeric vector of data values to be split into groups according to
    the grouping variable \code{grp} (usually a factor).}
  \item{data}{a data.frame (or list) from which the variables in
    \code{formula} should be taken.}
  \item{subset}{an optional vector specifying a subset of observations
    to be used for plotting.}
  \item{na.action}{a function which indicates what should happen
    when the data contain \code{NA}s.  The default is to ignore missing
    values in either the response or the group.}
  \item{x}{for specifying data from which the boxplots are to be
    produced. Either a numeric vector, or a single list containing such
    vectors. Additional unnamed arguments specify further data
    as separate vectors (each corresponding to a component boxplot).
    \code{\link{NA}}s are allowed in the data.}
  \item{\dots}{For the \code{formula} method, named arguments to be passed to
    the default method.

    For the default method, unnamed arguments are additional data
    vectors (unless \code{x} is a list when they are ignored),
    and named arguments are arguments and graphical parameters to be
    passed to \code{\link{bxp}} in addition to the ones
    given by argument \code{pars} (and override those in \code{pars}).
  }
  \item{range}{this determines how far the plot whiskers extend out
    from the box, and is simply passed as argument \code{coef} to
    \code{\link{adjboxStats}()}.  If \code{range} is positive, the
    whiskers extend to the most extreme data point which is no more than
    \code{range} times the interquartile range from the box.  A value
    of zero causes the whiskers to extend to the data extremes.}
  \item{doReflect}{logical indicating if the MC should also be
    computed on the \emph{reflected} sample \code{-x}, and be averaged,
    see \code{\link{mc}}.}
  \item{width}{a vector giving the relative widths of the boxes making
    up the plot.}
  \item{varwidth}{if \code{varwidth} is \code{TRUE}, the boxes are
    drawn with widths proportional to the square-roots of the number
    of observations in the groups.}
  \item{notch}{if \code{notch} is \code{TRUE}, a notch is drawn in
    each side of the boxes.  If the notches of two plots do not
    overlap this is \sQuote{strong evidence} that the two medians differ
    (Chambers \emph{et al.}, 1983, p. 62).  See \code{\link{boxplot.stats}}
    for the calculations used.}
  \item{outline}{if \code{outline} is not true, the outliers are
    not drawn (as points whereas S+ uses lines).}% the argument name is most ugly but S+ compatible
  \item{names}{group labels which will be printed under each boxplot.}
  \item{boxwex}{a scale factor to be applied to all boxes.  When there
    are only a few groups, the appearance of the plot can be improved
    by making the boxes narrower.}
  \item{staplewex}{staple line width expansion, proportional to box
    width.}
  \item{outwex}{outlier line width expansion, proportional to box
    width.}
  \item{plot}{if \code{TRUE} (the default) then a boxplot is
    produced.  If not, the summaries which the boxplots are based on
    are returned.}
  \item{border}{an optional vector of colors for the outlines of the
    boxplots.  The values in \code{border} are recycled if the
    length of \code{border} is less than the number of plots.}
  \item{col}{if \code{col} is non-null it is assumed to contain colors
    to be used to colour the bodies of the box plots. By default they
    are in the background colour.}
  \item{log}{character indicating if x or y or both coordinates should
    be plotted in log scale.}
  \item{pars}{a list of (potentially many) more graphical parameters,
    e.g., \code{boxwex} or \code{outpch}; these are passed to
    \code{\link{bxp}} (if \code{plot} is true); for details, see there.}
  \item{horizontal}{logical indicating if the boxplots should be
    horizontal; default \code{FALSE} means vertical boxes.}
  \item{add}{logical, if true \emph{add} boxplot to current plot.}
  \item{at}{numeric vector giving the locations where the boxplots should
    be drawn, particularly when \code{add = TRUE};
    defaults to \code{1:n} where \code{n} is the number of boxes.}
}
\details{
  The generic function \code{adjbox} currently has a default method
  (\code{adjbox.default}) and a formula interface (\code{adjbox.formula}).

  If multiple groups are supplied either as multiple arguments or via a
  formula, parallel boxplots will be plotted, in the order of the
  arguments or the order of the levels of the factor (see
  \code{\link{factor}}).

  Missing values are ignored when forming boxplots.

  Extremes of the upper and whiskers of the adjusted boxplots are
  computed using the medcouple (\code{\link{mc}()}), a robust measure of
  skewness. For details, cf. TODO %% << FIXME
}
\value{
  A \code{\link{list}} with the following components:

  \item{stats}{a matrix, each column contains the extreme of the lower
    whisker, the lower hinge, the median, the upper hinge and the extreme of
    the upper whisker for one group/plot.  If all the inputs have the same
    class attribute, so will this component.}
  \item{n}{a vector with the number of observations in each group.}
  \item{coef}{a matrix where each column contains the lower and upper
    extremes of the notch.}
  \item{out}{the values of any data points which lie beyond the extremes
    of the whiskers.}
  \item{group}{a vector of the same length as out whose elements
    indicate to which group the outlier belongs.}
  \item{names}{a vector of names for the groups.}
}
\references{
%% Hubert, M. and Vandervieren, E. (2006)
%% \emph{An Adjusted Boxplot for Skewed Distributions},
%% Technical Report TR-06-11, KU Leuven, Section of Statistics, Leuven.
%% \url{http://wis.kuleuven.be/stat/robust/Papers/TR0611.pdf}

  Hubert, M. and Vandervieren, E. (2008).
  An adjusted boxplot for skewed distributions,
  \emph{Computational Statistics and Data Analysis} \bold{52}, 5186--5201.
  \doi{10.1016/j.csda.2007.11.008}
}
\author{ R Core Development Team, slightly adapted by Tobias Verbeke }
\note{ The code and documentation only slightly modifies the code of
  \code{\link{boxplot.default}}, \code{boxplot.formula} and
  \code{\link{boxplot.stats}}
}
\seealso{The medcouple, \code{\link{mc}}; \code{\link{boxplot}}.
}
\examples{
if(require("boot")) {
 ### Hubert and Vandervieren (2008), Fig. 5.%(2006): p. 10, Fig. 4.
 data(coal, package = "boot")
 coaldiff <- diff(coal$date)
 op <- par(mfrow = c(1,2))
 boxplot(coaldiff, main = "Original Boxplot")
 adjbox(coaldiff, main  = "Adjusted Boxplot")
 par(op)
}

### Hubert and Vandervieren (2008), p. 11, Fig. 7a -- enhanced
op <- par(mfrow = c(2,2), mar = c(1,3,3,1), oma = c(0,0,3,0))
with(condroz, {
 boxplot(Ca, main = "Original Boxplot")
 adjbox (Ca, main = "Adjusted Boxplot")
 boxplot(Ca, main = "Original Boxplot [log]", log = "y")
 adjbox (Ca, main = "Adjusted Boxplot [log]", log = "y")
})
mtext("'Ca' from data(condroz)",
      outer=TRUE, font = par("font.main"), cex = 2)
par(op)
}
\keyword{hplot}