File: barNest.Rd

package info (click to toggle)
r-cran-plotrix 3.2-6-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 1,136 kB
  • sloc: makefile: 3
file content (129 lines) | stat: -rwxr-xr-x 7,239 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
\name{barNest}
\alias{barNest}
\title{Display a nested breakdown of numeric values}
\description{Breaks down the elements of a data frame by one or more
 categorical elements and displays the breakdown as a bar plot.}
\usage{
 barNest(formula=NULL,data=NULL,FUN=c("mean","sd"),ylim=NULL,
 main="",xlab="",ylab="",shrink=0.1,errbars=FALSE,col=NA,
 labelcex=1,lineht=NA,showall=TRUE,barlabels=NULL,showlabels=TRUE,mar=NULL,
 arrow.cap=NA,trueval=NA)
}
\arguments{
 \item{formula}{A formula with a numeric element of a data frame on the left and
  one or more categorical elements on the right.}
 \item{data}{A data frame containing the elements in \samp{formula}.}
 \item{FUN}{The functions to apply to x.}
 \item{ylim}{Optional y limits for the plot, usually necessary for counts.}
 \item{main}{Title for the plot.}
 \item{xlab,ylab}{Axis labels for the plot. The x axis label is typically blank}
 \item{shrink}{The proportion to shrink the width of the bars at each level.}
 \item{errbars}{Whether to display error bars on the lowest level of breakdown.}
 \item{col}{The colors to use to fill the bars. See Details.}
 \item{labelcex}{Character size for the group labels.}
 \item{lineht}{The height of a line of text in the lower margin of the plot in user
  units. This will be calculated by the function if a value is not passed.}
 \item{showall}{Whether to display bars for the entire breakdown.}
 \item{barlabels}{Optional group labels that may be useful if the factors used to
  break down the numeric variable are fairly long strings.}
 \item{showlabels}{Whether to display the labels below the bars.}
 \item{mar}{If not NULL, a four element vector to set the plot margins. If new
  margins are set, the user must reset the margins after the function exits.}
 \item{arrow.cap}{The width of the "cap" on error bars in user units,
  calculated on the basis of the number of bars in the final breakdown if NA.}
 \item{trueval}{If this is not NA, the call to \samp{brkdnNest} will return the
  proportions of the response variable that are equal to \samp{trueval}.
  See Details.}
}
\value{The summary list produced by brkdnNest.}
\details{
 \samp{barNest} displays a bar plot illustrating the hierarchic breakdown of
 the elements of a data frame. The breakdown is performed by \samp{brkdnNest}
 and the actual display is performed by \samp{drawNestedBars}. The heights of
 the bars will be proportional to the values returned by the first function in
 \samp{FUN}. If \samp{showall} is TRUE, the entire nested breakdown will be
 displayed. This can be useful in visualizing the relationship between groups
 and subgroups in a compact format.

 If \samp{trueval} is not NA and brkdnNest is called to calculate the values for
 the heights of the bars, the proportions of the dichotomous response variable
 that are equal to \samp{trueval} will be returned.

 A number of functions can be passed in the \samp{FUN} argument. Two functions,
 \samp{propbrk} and \samp{valid.n} that are provided can be used as bar heights,
 giving proportions and counts in each group and subgroup respectively.
 Binomial confidence limits can be added to the proportions returned by
 \samp{propbrk} with \samp{binciWl} and \samp{binciWu} as in the second last
 example.

 The colors of the bars are determined by \samp{col}. If \samp{showall} is FALSE,
 the user only need pass a vector of colors, usually the same length as the number
 of categories in the final (last on the right side) element in the formula. If
 \samp{showall} is TRUE and the user wants to color all of the bars, a list with
 as many elements as there are levels in the breakdown should be passed. Each
 element should be a vector of colors, again usually the same length as the number
 of categories. As the categorical variables are likely to be factors, it is
 important to remember that the colors must be in the correct order for the levels
 of the factors. When the levels are not in the default alphanumeric order, it is
 quite easy to get this wrong. As a \samp{barNest} plot with more than a few
 factors and levels in each factor is quite dense, easily distinguished colors
 for each level of the breakdown may be preferable. 
}
\author{Jim Lemon and Ofir Levy}
\references{
 Lemon, J. & Levy, O. (2011) barNest: Illustrating nested summary measures.
 Statistical Computing and Graphics Newsletter of the American Statistical
 Association, 21(2): 5-10.
}
\seealso{\link{brkdnNest}, \link{drawNestedBars}, superbarplot(UsingR)}
\examples{
 # recreate the Titanic data frame and show the three way breakdown
 titanic<-data.frame(
  class=c(rep("1st",325),rep("2nd",285),rep("3rd",706),rep("Crew",885)),
  age=c(rep("Adult",319),rep("Child",6),rep("Adult",261),rep("Child",24),
  rep("Adult",627),rep("Child",79),rep("Adult",885)),
  sex=c(rep("M",175),rep("F",144),rep("M",5),rep("F",1),
  rep("M",168),rep("F",93),rep("M",11),rep("F",13),
  rep("M",462),rep("F",165),rep("M",48),rep("F",31),
  rep("M",862),rep("F",23)),
  survived=c(rep("Yes",57),rep("No",118),rep("Yes",140),rep("No",4),rep("Yes",6),
  rep("Yes",14),rep("No",154),rep("Yes",80),rep("No",13),rep("Yes",24),
  rep("Yes",75),rep("No",387),rep("Yes",76),rep("No",89),
  rep("Yes",13),rep("No",35),rep("Yes",14),rep("No",17),
  rep("Yes",192),rep("No",670),rep("Yes",20),rep("No",3)))
 require(plotrix)
 titanic.colors<-list("gray90",c("#0000ff","#7700ee","#aa00cc","#dd00aa"),
  c("#ddcc00","#ee9900"),c("pink","lightblue"))
 barNest(survived~class+age+sex,titanic,col=titanic.colors,showall=TRUE,
  main="Titanic survival by class, age and sex",ylab="Proportion surviving",
  FUN="propbrk",shrink=0.15,trueval="Yes")
 # now show the actual numbers of passengers
 barNest(survived~class+age+sex,titanic,col=titanic.colors,showall=TRUE,
  main="Titanic passengers and crew by class, age and sex",ylab="Number",
  FUN="valid.n",shrink=0.15)
 # to see this properly displayed, start a wide plot window
 # x11(width=10)
 test.df<-data.frame(Age=rnorm(100,25,10),
  Sex=sample(c("Male","Female"),100,TRUE),
  Marital=sample(c("Div","Mar","Sing","Wid"),100,TRUE),
  Employ=sample(c("FT","PT","Un"),100,TRUE))
 test.col<-list(Overall="gray",Sex=c("pink","lightblue"),
  Marital=c("mediumpurple","orange","tan","lightgreen"),
  Employ=c("#1affd8","#caeecc","#ff90d0"))
 barNest(formula=Age~Sex+Marital+Employ,data=test.df,ylab="Mean age (years)",
  main="Mean age by subgroups",errbars=TRUE,col=test.col)
 # set up functions for 20th and 80th percentiles
 q20<-function(x,na.rm=TRUE) return(quantile(x,probs=0.2,na.rm=TRUE))
 q80<-function(x,na.rm=TRUE) return(quantile(x,probs=0.8,na.rm=TRUE))
 # show the asymmetric dispersion measures
 barNest(formula=Age~Sex+Marital+Employ,data=test.df,ylab="Mean age (years)",
  main="Use median and quantiles for dispersion",FUN=c("median","q80","q20"),
  errbars=TRUE,col=test.col)
 barNest(formula=Employ~Sex+Marital,data=test.df,ylab="Proportion unemployed",
  main="Show the proportion unemployed",FUN=c("propbrk","binciWu","binciWl"),
 errbars=TRUE,col=test.col,trueval="Un")
 barNest(formula=Age~Sex+Marital+Employ,data=test.df,ylab="Counts",
  main="Show the counts in subgroups (final level only)",FUN="valid.n",
  col=test.col,showall=FALSE,ylim=c(0,10))
}
\keyword{misc}