1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
|
\name{B_03_histogram}
\alias{histogram}
\alias{histogram.factor}
\alias{histogram.numeric}
\alias{histogram.formula}
\alias{densityplot}
\alias{densityplot.numeric}
\alias{densityplot.formula}
\alias{do.breaks}
\title{Histograms and Kernel Density Plots}
\usage{
histogram(x, data, \dots)
densityplot(x, data, \dots)
\method{histogram}{formula}(x,
data,
allow.multiple, outer = TRUE,
auto.key = FALSE,
aspect = "fill",
panel = lattice.getOption("panel.histogram"),
prepanel, scales, strip, groups,
xlab, xlim, ylab, ylim,
type = c("percent", "count", "density"),
nint = if (is.factor(x)) nlevels(x)
else round(log2(length(x)) + 1),
endpoints = extend.limits(range(as.numeric(x),
finite = TRUE), prop = 0.04),
breaks,
equal.widths = TRUE,
drop.unused.levels =
lattice.getOption("drop.unused.levels"),
\dots,
lattice.options = NULL,
default.scales = list(),
default.prepanel =
lattice.getOption("prepanel.default.histogram"),
subscripts,
subset)
\method{histogram}{numeric}(x, data = NULL, xlab, \dots)
\method{histogram}{factor}(x, data = NULL, xlab, \dots)
\method{densityplot}{formula}(x,
data,
allow.multiple = is.null(groups) || outer,
outer = !is.null(groups),
auto.key = FALSE,
aspect = "fill",
panel = lattice.getOption("panel.densityplot"),
prepanel, scales, strip, groups, weights,
xlab, xlim, ylab, ylim,
bw, adjust, kernel, window, width, give.Rkern,
n = 512, from, to, cut, na.rm,
drop.unused.levels =
lattice.getOption("drop.unused.levels"),
\dots,
lattice.options = NULL,
default.scales = list(),
default.prepanel =
lattice.getOption("prepanel.default.densityplot"),
subscripts,
subset)
\method{densityplot}{numeric}(x, data = NULL, xlab, \dots)
do.breaks(endpoints, nint)
}
\description{
Draw Histograms and Kernel Density Plots, possibly conditioned on
other variables.
}
\arguments{
\item{x}{
The object on which method dispatch is carried out.
For the \code{formula} method, \code{x} can be a formula of the form
\code{~ x | g1 * g2 * \dots}, indicating that histograms or kernel
density estimates of the \code{x} variable should be produced
conditioned on the levels of the (optional) variables \code{g1},
\code{g2}, \dots. \code{x} should be numeric (or possibly a factor
in the case of \code{histogram}), and each of \code{g1}, \code{g2},
\dots should be either factors or shingles.
As a special case, the right hand side of the formula can contain
more than one term separated by \sQuote{+} signs (e.g., \code{~ x1 +
x2 | g1 * g2}). What happens in this case is described in the
documentation for \code{\link{xyplot}}. Note that in either form,
all the terms in the formula must have the same length after
evaluation.
For the \code{numeric} and \code{factor} methods, \code{x} is the
variable whose histogram or Kernel density estimate is drawn.
Conditioning is not allowed in these cases.
}
\item{data}{
For the \code{formula} method, an optional data source (usually a
data frame) in which variables are to be evaluated (see
\code{\link{xyplot}} for details). \code{data} should not be
specified for the other methods, and is ignored with a warning if it
is.
}
\item{type}{
A character string indicating the type of histogram that is to be
drawn. \code{"percent"} and \code{"count"} give relative frequency
and frequency histograms respectively, and can be misleading when
breakpoints are not equally spaced. \code{"density"} produces a
density histogram.
\code{type} defaults to \code{"density"} when the breakpoints are
unequally spaced, and when \code{breaks} is \code{NULL} or a
function, and to \code{"percent"} otherwise.
}
\item{nint}{
An integer specifying the number of histogram bins, applicable only
when \code{breaks} is unspecified or \code{NULL} in the call.
Ignored when the variable being plotted is a factor.
}
\item{endpoints}{
A numeric vector of length 2 indicating the range of x-values that
is to be covered by the histogram. This applies only when
\code{breaks} is unspecified and the variable being plotted is not a
factor. In \code{do.breaks}, this specifies the interval that is to
be divided up.
}
\item{breaks}{
Usually a numeric vector of length (number of bins + 1) defining the
breakpoints of the bins. Note that when breakpoints are not equally
spaced, the only value of \code{type} that makes sense is density.
When \code{breaks} is unspecified, the value of
\code{lattice.getOption("histogram.breaks")} is first checked. If
this value is \code{NULL}, then the default is to use
\preformatted{
breaks = seq_len(1 + nlevels(x)) - 0.5
}
when \code{x} is a factor, and
\preformatted{
breaks = do.breaks(endpoints, nint)
}
otherwise. Breakpoints calculated in such a manner are used in all
panels. If the retrieved value is not \code{NULL}, or if
\code{breaks} is explicitly specified, it affects the display in
each panel independently. Valid values are those accepted as the
\code{breaks} argument in \code{\link{hist}}. In particular, this
allows specification of \code{breaks} as an integer giving the
number of bins (similar to \code{nint}), as a character string
denoting a method, or as a function.
When specified explicitly, a special value of \code{breaks} is
\code{NULL}, in which case the number of bins is determined by
\code{nint} and then breakpoints are chosen according to the value
of \code{equal.widths}.
}
\item{equal.widths}{
A logical flag, relevant only when \code{breaks=NULL}. If
\code{TRUE}, equally spaced bins will be selected, otherwise,
approximately equal area bins will be selected (typically producing
unequally spaced breakpoints).
}
\item{n}{
Integer, giving the number of points at which the kernel density is
to be evaluated. Passed on as an argument to \code{\link{density}}.
}
\item{panel}{
A function, called once for each panel, that uses the packet (subset
of panel variables) corresponding to the panel to create a display.
The default panel functions \code{\link{panel.histogram}} and
\code{\link{panel.densityplot}} are documented separately, and have
arguments that can be used to customize its output in various ways.
Such arguments can usually be directly supplied to the high-level
function.
}
\item{allow.multiple, outer}{ See \code{\link{xyplot}}. }
\item{auto.key}{ See \code{\link{xyplot}}. }
\item{aspect}{ See \code{\link{xyplot}}. }
\item{prepanel}{ See \code{\link{xyplot}}. }
\item{scales}{ See \code{\link{xyplot}}. }
\item{strip}{ See \code{\link{xyplot}}. }
\item{groups}{
See \code{\link{xyplot}}. Note that the default panel function for
\code{histogram} does not support grouped displays, whereas the one
for \code{densityplot} does.
}
\item{xlab, ylab}{ See \code{\link{xyplot}}. }
\item{xlim, ylim}{ See \code{\link{xyplot}}. }
\item{drop.unused.levels}{ See \code{\link{xyplot}}. }
\item{lattice.options}{ See \code{\link{xyplot}}. }
\item{default.scales}{ See \code{\link{xyplot}}. }
\item{subscripts}{ See \code{\link{xyplot}}. }
\item{subset}{ See \code{\link{xyplot}}. }
\item{default.prepanel}{
Fallback prepanel function. See \code{\link{xyplot}}.
}
\item{weights}{ numeric vector of weights for the density
calculations, evaluated in the non-standard manner used for
\code{groups} and terms in the formula, if any. If this is
specified, it is subsetted using \code{subscripts} inside the panel
function to match it to the corresponding \code{x} values.
At the time of writing, \code{weights} do not work in conjunction
with an extended formula specification (this is not too hard to fix,
so just bug the maintainer if you need this feature).
}
\item{bw, adjust, width}{
Arguments controlling bandwidth. Passed on as arguments to
\code{\link{density}}.
}
\item{kernel, window}{
The choice of kernel. Passed on as arguments to
\code{\link{density}}.
}
\item{give.Rkern}{
Logical flag, passed on as argument to \code{\link{density}}.
This argument is made available only for ease of implementation, and
will produce an error if \code{TRUE}.
}
\item{from, to, cut}{
Controls range over which density is evaluated. Passed on as
arguments to \code{\link{density}}.
}
\item{na.rm}{
Logical flag specifying whether \code{NA} values should be ignored.
Passed on as argument to \code{\link{density}}, but unlike in
\code{density}, the default is \code{TRUE}.
}
\item{\dots}{ Further arguments. See corresponding entry in
\code{\link{xyplot}} for non-trivial details. }
}
\value{
An object of class \code{"trellis"}. The
\code{\link[lattice:update.trellis]{update}} method can be used to
update components of the object and the
\code{\link[lattice:print.trellis]{print}} method (usually called by
default) will plot it on an appropriate plotting device.
}
\details{
\code{histogram} draws Conditional Histograms, and \code{densityplot}
draws Conditional Kernel Density Plots. The default panel function
uses the \code{\link{density}} function to compute the density
estimate, and all arguments accepted by \code{density} can be
specified in the call to \code{densityplot} to control the output.
See documentation of \code{density} for details.
These and all other high level Trellis functions have several
arguments in common. These are extensively documented only in the
help page for \code{xyplot}, which should be consulted to learn more
detailed usage.
\code{do.breaks} is an utility function that calculates breakpoints
given an interval and the number of pieces to break it into.
}
\note{
The form of the arguments accepted by the default panel function
\code{panel.histogram} is different from that in S-PLUS. Whereas
S-PLUS calculates the heights inside \code{histogram} and passes only
the breakpoints and the heights to the panel function, \pkg{lattice}
simply passes along the original variable \code{x} along with the
breakpoints. This approach is more flexible; see the example below
with an estimated density superimposed over the histogram.
}
\references{
Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data
Visualization with R}, Springer.
\url{http://lmdvr.r-forge.r-project.org/}
}
\seealso{
\code{\link{xyplot}},
\code{\link{panel.histogram}},
\code{\link{density}},
\code{\link{panel.densityplot}},
\code{\link{panel.mathdensity}},
\code{\link{Lattice}}
}
\author{ Deepayan Sarkar \email{Deepayan.Sarkar@R-project.org}}
\examples{
require(stats)
histogram( ~ height | voice.part, data = singer, nint = 17,
endpoints = c(59.5, 76.5), layout = c(2,4), aspect = 1,
xlab = "Height (inches)")
histogram( ~ height | voice.part, data = singer,
xlab = "Height (inches)", type = "density",
panel = function(x, ...) {
panel.histogram(x, ...)
panel.mathdensity(dmath = dnorm, col = "black",
args = list(mean=mean(x),sd=sd(x)))
} )
densityplot( ~ height | voice.part, data = singer, layout = c(2, 4),
xlab = "Height (inches)", bw = 5)
}
\keyword{hplot}
|