File: histogram.Rd

package info (click to toggle)
lattice 0.20-41-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 1,988 kB
  • sloc: ansic: 357; makefile: 2
file content (293 lines) | stat: -rw-r--r-- 11,924 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
\name{B_03_histogram}
\alias{histogram}
\alias{histogram.factor}
\alias{histogram.numeric}
\alias{histogram.formula}
\alias{densityplot}
\alias{densityplot.numeric}
\alias{densityplot.formula}
\alias{do.breaks}
\title{Histograms and Kernel Density Plots}
\usage{
histogram(x, data, \dots)
densityplot(x, data, \dots)
\method{histogram}{formula}(x,
          data,
          allow.multiple, outer = TRUE,
          auto.key = FALSE,
          aspect = "fill",
          panel = lattice.getOption("panel.histogram"),
          prepanel, scales, strip, groups,
          xlab, xlim, ylab, ylim,
          type = c("percent", "count", "density"),
          nint = if (is.factor(x)) nlevels(x)
          else round(log2(length(x)) + 1),
          endpoints = extend.limits(range(as.numeric(x),
                          finite = TRUE), prop = 0.04),
          breaks,
          equal.widths = TRUE,
          drop.unused.levels =
              lattice.getOption("drop.unused.levels"),
          \dots,
          lattice.options = NULL,
          default.scales = list(),
          default.prepanel =
              lattice.getOption("prepanel.default.histogram"),
          subscripts,
          subset)

\method{histogram}{numeric}(x, data = NULL, xlab, \dots)
\method{histogram}{factor}(x, data = NULL, xlab, \dots)

\method{densityplot}{formula}(x,
            data,
            allow.multiple = is.null(groups) || outer,
            outer = !is.null(groups),
            auto.key = FALSE,
            aspect = "fill",
            panel = lattice.getOption("panel.densityplot"),
            prepanel, scales, strip, groups, weights,
            xlab, xlim, ylab, ylim,
            bw, adjust, kernel, window, width, give.Rkern,
            n = 512, from, to, cut, na.rm,
            drop.unused.levels =
                lattice.getOption("drop.unused.levels"),
            \dots,
            lattice.options = NULL,
            default.scales = list(),
            default.prepanel =
                lattice.getOption("prepanel.default.densityplot"),
            subscripts,
            subset)
\method{densityplot}{numeric}(x, data = NULL, xlab, \dots)

do.breaks(endpoints, nint)
}
\description{
  Draw Histograms and Kernel Density Plots, possibly conditioned on
  other variables.
}
\arguments{
  \item{x}{
    The object on which method dispatch is carried out.
    
    For the \code{formula} method, \code{x} can be a formula of the form
    \code{~ x | g1 * g2 * \dots}, indicating that histograms or kernel
    density estimates of the \code{x} variable should be produced
    conditioned on the levels of the (optional) variables \code{g1},
    \code{g2}, \dots.  \code{x} should be numeric (or possibly a factor
    in the case of \code{histogram}), and each of \code{g1}, \code{g2},
    \dots should be either factors or shingles.
  
    As a special case, the right hand side of the formula can contain
    more than one term separated by \sQuote{+} signs (e.g., \code{~ x1 +
    x2 | g1 * g2}).  What happens in this case is described in the
    documentation for \code{\link{xyplot}}.  Note that in either form,
    all the terms in the formula must have the same length after
    evaluation.
    
    For the \code{numeric} and \code{factor} methods, \code{x} is the
    variable whose histogram or Kernel density estimate is drawn.
    Conditioning is not allowed in these cases.

  }
  \item{data}{
    For the \code{formula} method, an optional data source (usually a
    data frame) in which variables are to be evaluated (see
    \code{\link{xyplot}} for details).  \code{data} should not be
    specified for the other methods, and is ignored with a warning if it
    is.
  }
  \item{type}{
    A character string indicating the type of histogram that is to be
    drawn.  \code{"percent"} and \code{"count"} give relative frequency
    and frequency histograms respectively, and can be misleading when
    breakpoints are not equally spaced. \code{"density"} produces a
    density histogram.

    \code{type} defaults to \code{"density"} when the breakpoints are
    unequally spaced, and when \code{breaks} is \code{NULL} or a
    function, and to \code{"percent"} otherwise.
  }
  \item{nint}{
    An integer specifying the number of histogram bins, applicable only
    when \code{breaks} is unspecified or \code{NULL} in the call.
    Ignored when the variable being plotted is a factor.
  }
  \item{endpoints}{
    A numeric vector of length 2 indicating the range of x-values that
    is to be covered by the histogram.  This applies only when
    \code{breaks} is unspecified and the variable being plotted is not a
    factor.  In \code{do.breaks}, this specifies the interval that is to
    be divided up.
  }
  \item{breaks}{
    Usually a numeric vector of length (number of bins + 1) defining the
    breakpoints of the bins.  Note that when breakpoints are not equally
    spaced, the only value of \code{type} that makes sense is density.

    When \code{breaks} is unspecified, the value of
    \code{lattice.getOption("histogram.breaks")} is first checked.  If
    this value is \code{NULL}, then the default is to use
    \preformatted{
      breaks = seq_len(1 + nlevels(x)) - 0.5
    }
    when \code{x} is a factor, and 
    \preformatted{
      breaks = do.breaks(endpoints, nint)
    }
    otherwise.  Breakpoints calculated in such a manner are used in all
    panels.  If the retrieved value is not \code{NULL}, or if
    \code{breaks} is explicitly specified, it affects the display in
    each panel independently.  Valid values are those accepted as the
    \code{breaks} argument in \code{\link{hist}}.  In particular, this
    allows specification of \code{breaks} as an integer giving the
    number of bins (similar to \code{nint}), as a character string
    denoting a method, or as a function.

    When specified explicitly, a special value of \code{breaks} is
    \code{NULL}, in which case the number of bins is determined by
    \code{nint} and then breakpoints are chosen according to the value
    of \code{equal.widths}.
  }
  \item{equal.widths}{
    A logical flag, relevant only when \code{breaks=NULL}.  If
    \code{TRUE}, equally spaced bins will be selected, otherwise,
    approximately equal area bins will be selected (typically producing
    unequally spaced breakpoints).
  }
  \item{n}{
    Integer, giving the number of points at which the kernel density is
    to be evaluated.  Passed on as an argument to \code{\link{density}}.
  }
  \item{panel}{
    A function, called once for each panel, that uses the packet (subset
    of panel variables) corresponding to the panel to create a display.
    The default panel functions \code{\link{panel.histogram}} and
    \code{\link{panel.densityplot}} are documented separately, and have
    arguments that can be used to customize its output in various ways.
    Such arguments can usually be directly supplied to the high-level
    function.
  }
  \item{allow.multiple, outer}{ See \code{\link{xyplot}}. }
  \item{auto.key}{ See \code{\link{xyplot}}. }
  \item{aspect}{ See \code{\link{xyplot}}. }
  \item{prepanel}{ See \code{\link{xyplot}}. }
  \item{scales}{ See \code{\link{xyplot}}. }
  \item{strip}{ See \code{\link{xyplot}}. }
  \item{groups}{
    See \code{\link{xyplot}}.  Note that the default panel function for
    \code{histogram} does not support grouped displays, whereas the one
    for \code{densityplot} does.
  }
  \item{xlab, ylab}{ See \code{\link{xyplot}}. }
  \item{xlim, ylim}{ See \code{\link{xyplot}}. }
  \item{drop.unused.levels}{ See \code{\link{xyplot}}. }
  \item{lattice.options}{ See \code{\link{xyplot}}. }
  \item{default.scales}{ See \code{\link{xyplot}}. }
  \item{subscripts}{ See \code{\link{xyplot}}. }
  \item{subset}{ See \code{\link{xyplot}}. }
  \item{default.prepanel}{
    Fallback prepanel function.  See \code{\link{xyplot}}.
  }
  \item{weights}{ numeric vector of weights for the density
    calculations, evaluated in the non-standard manner used for
    \code{groups} and terms in the formula, if any.  If this is
    specified, it is subsetted using \code{subscripts} inside the panel
    function to match it to the corresponding \code{x} values.

    At the time of writing, \code{weights} do not work in conjunction
    with an extended formula specification (this is not too hard to fix,
    so just bug the maintainer if you need this feature).
  }
  \item{bw, adjust, width}{
    Arguments controlling bandwidth.  Passed on as arguments to
    \code{\link{density}}.
  }
  \item{kernel, window}{
    The choice of kernel.  Passed on as arguments to
    \code{\link{density}}.  
  }
  \item{give.Rkern}{
    Logical flag, passed on as argument to \code{\link{density}}.
    This argument is made available only for ease of implementation, and
    will produce an error if \code{TRUE}.
  }
  \item{from, to, cut}{ 
    Controls range over which density is evaluated.  Passed on as
    arguments to \code{\link{density}}.
  }
  \item{na.rm}{
    Logical flag specifying whether \code{NA} values should be ignored.
    Passed on as argument to \code{\link{density}}, but unlike in
    \code{density}, the default is \code{TRUE}.
  }
  \item{\dots}{ Further arguments.  See corresponding entry in
    \code{\link{xyplot}} for non-trivial details.  }
}
\value{
  An object of class \code{"trellis"}. The
  \code{\link[lattice:update.trellis]{update}} method can be used to
  update components of the object and the
  \code{\link[lattice:print.trellis]{print}} method (usually called by
  default) will plot it on an appropriate plotting device.
}
\details{
  \code{histogram} draws Conditional Histograms, and \code{densityplot}
  draws Conditional Kernel Density Plots.  The default panel function
  uses the \code{\link{density}} function to compute the density
  estimate, and all arguments accepted by \code{density} can be
  specified in the call to \code{densityplot} to control the output.
  See documentation of \code{density} for details.
  
  These and all other high level Trellis functions have several
  arguments in common. These are extensively documented only in the
  help page for \code{xyplot}, which should be consulted to learn more
  detailed usage.

  \code{do.breaks} is an utility function that calculates breakpoints
  given an interval and the number of pieces to break it into.
}
\note{
  The form of the arguments accepted by the default panel function
  \code{panel.histogram} is different from that in S-PLUS. Whereas
  S-PLUS calculates the heights inside \code{histogram} and passes only
  the breakpoints and the heights to the panel function, \pkg{lattice}
  simply passes along the original variable \code{x} along with the
  breakpoints. This approach is more flexible; see the example below
  with an estimated density superimposed over the histogram.
}

\references{
  Sarkar, Deepayan (2008) \emph{Lattice: Multivariate Data
    Visualization with R}, Springer.
  \url{http://lmdvr.r-forge.r-project.org/}
}

\seealso{
  \code{\link{xyplot}},
  \code{\link{panel.histogram}},
  \code{\link{density}},
  \code{\link{panel.densityplot}},
  \code{\link{panel.mathdensity}},
  \code{\link{Lattice}} 
}
\author{ Deepayan Sarkar \email{Deepayan.Sarkar@R-project.org}}
\examples{
require(stats)
histogram( ~ height | voice.part, data = singer, nint = 17,
          endpoints = c(59.5, 76.5), layout = c(2,4), aspect = 1,
          xlab = "Height (inches)")

histogram( ~ height | voice.part, data = singer,
          xlab = "Height (inches)", type = "density",
          panel = function(x, ...) {
              panel.histogram(x, ...)
              panel.mathdensity(dmath = dnorm, col = "black",
                                args = list(mean=mean(x),sd=sd(x)))
          } )

densityplot( ~ height | voice.part, data = singer, layout = c(2, 4),  
            xlab = "Height (inches)", bw = 5)
}
\keyword{hplot}