File: summaryRc.Rd

package info (click to toggle)
hmisc 4.2-0-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 3,332 kB
  • sloc: asm: 27,116; fortran: 606; ansic: 411; xml: 160; makefile: 2
file content (132 lines) | stat: -rw-r--r-- 5,659 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
\name{summaryRc}
\alias{summaryRc}
\title{Graphical Summarization of Continuous Variables Against a Response}
\description{
	\code{summaryRc} is a continuous version of \code{\link{summary.formula}}
	with \code{method='response'}.  It uses the \code{\link{plsmo}}
	function to compute the possibly stratified \code{\link{lowess}}
	nonparametric regression estimates, and plots them along with the data
	density, with selected quantiles of the overall distribution (over
	strata) of each \code{x} shown as arrows on top of the graph.  All the
	\code{x} variables must be numeric and continuous or nearly continuous.
}
\usage{
summaryRc(formula, data=NULL, subset=NULL,
          na.action=NULL, fun = function(x) x,
          na.rm = TRUE, ylab=NULL, ylim=NULL, xlim=NULL,
          nloc=NULL, datadensity=NULL,
          quant = c(0.05, 0.1, 0.25, 0.5, 0.75,
                    0.90, 0.95), quantloc=c('top','bottom'),
          cex.quant=.6, srt.quant=0,
          bpplot = c('none', 'top', 'top outside', 'top inside', 'bottom'),
          height.bpplot=0.08,
          trim=NULL, test = FALSE, vnames = c('labels', 'names'), \dots)
}
\arguments{
  \item{formula}{
    An \R formula with additive effects.  The \code{formula} may contain
		one or more invocations of the \code{stratify} function whose
		arguments are defined below.  This causes 
    the entire analysis to be stratified by cross-classifications of the
    combined list of stratification factors.  This stratification will be
    reflected as separate \code{lowess} curves.}
  \item{data}{
    name or number of a data frame.  Default is the current frame.
  }
  \item{subset}{
    a logical vector or integer vector of subscripts used to specify the
    subset of data to use in the analysis.  The default is to use all
    observations in the data frame.
  }
  \item{na.action}{
    function for handling missing data in the input data.  The default is
    a function defined here called \code{na.retain}, which keeps all
    observations for processing, with missing variables or not.
  }
  \item{fun}{
    function for transforming \code{lowess} estimates.  Default is the
		identity function.}
  \item{na.rm}{
    \code{TRUE} (the default) to exclude \code{NA}s before passing data to
    \code{fun} to compute statistics, \code{FALSE} otherwise.
  }
	\item{ylab}{\code{y}-axis label.  Default is label attribute of
		\code{y} variable, or its name.}
	\item{ylim}{\code{y}-axis limits.  By default each graph is scaled on
		its own.}
	\item{xlim}{a list with elements named as the variable names appearing
		on the \code{x}-axis, with each element being a 2-vector specifying
		lower and upper limits.  Any variable not appearing in the list will
		have its limits computed and possibly \code{trim}med.}
	\item{nloc}{location for sample size.  Specify \code{nloc=FALSE} to
		suppress, or \code{nloc=list(x=,y=)} where \code{x,y} are relative
		coordinates in the data window.  Default position is in the largest
		empty space.}
	\item{datadensity}{see \code{\link{plsmo}}.  Defaults to \code{TRUE}
		if there is a \code{stratify} variable, \code{FALSE} otherwise.}
  \item{quant}{
    vector of quantiles to use for summarizing the marginal distribution
		of each \code{x}. This must be numbers between 0 and 1
    inclusive.  Use \code{NULL} to omit quantiles.
  }
	\item{quantloc}{specify \code{quantloc='bottom'} to place at the
		bottom of each plot rather than the default}
	\item{cex.quant}{character size for writing which quantiles are
		represented.  Set to \code{0} to suppress quantile labels.}
	\item{srt.quant}{angle for text for quantile labels}
	\item{bpplot}{if not \code{'none'} will draw extended box plot at
		location given by \code{bpplot}, and quantiles discussed above will
		be suppressed.  Specifying \code{bpplot='top'} is the same as
		specifying \code{bpplot='top inside'}.}
	\item{height.bpplot}{height in inches of the horizontal extended box plot}
	\item{trim}{The default is to plot from the 10th smallest to the 10th
		largest \code{x} if the number of non-NAs exceeds 200, otherwise to
		use the entire range of \code{x}.  Specify another quantile to use
		other limits, e.g.,  \code{trim=0.01} will use the first and last
		percentiles}
  \item{test}{
    Set to \code{TRUE} to plot test statistics (not yet implemented).
  }
  \item{vnames}{
    By default, plots are usually labeled with variable labels
    (see the \code{label} and \code{sas.get} functions).  To use the shorter
    variable names, specify \code{vnames="names"}.
  }
  \item{...}{arguments passed to \code{\link{plsmo}}}
}
\value{no value is returned}
\author{
  Frank Harrell\cr
  Department of Biostatistics\cr
  Vanderbilt University\cr
  \email{f.harrell@vanderbilt.edu}
}
\seealso{
  \code{\link{plsmo}}, \code{\link{stratify}},
  \code{\link{label}}, \code{\link{formula}}, \code{\link{panel.bpplot}} 
}
\examples{
options(digits=3)
set.seed(177)
sex <- factor(sample(c("m","f"), 500, rep=TRUE))
age <- rnorm(500, 50, 5)
bp  <- rnorm(500, 120, 7)
units(age) <- 'Years'; units(bp) <- 'mmHg'
label(bp) <- 'Systolic Blood Pressure'
L <- .5*(sex == 'm') + 0.1 * (age - 50)
y <- rbinom(500, 1, plogis(L))
par(mfrow=c(1,2))
summaryRc(y ~ age + bp)
# For x limits use 1st and 99th percentiles to frame extended box plots
summaryRc(y ~ age + bp, bpplot='top', datadensity=FALSE, trim=.01)
summaryRc(y ~ age + bp + stratify(sex),
          label.curves=list(keys='lines'), nloc=list(x=.1, y=.05))
y2 <- rbinom(500, 1, plogis(L + .5))
Y <- cbind(y, y2)
summaryRc(Y ~ age + bp + stratify(sex),
          label.curves=list(keys='lines'), nloc=list(x=.1, y=.05))
}
\keyword{hplot}