1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
|
\name{raw.means.plot}
\alias{raw.means.plot}
\alias{raw.means.plot2}
\title{
raw.means.plot: Raw-Means Plots for Experimental Designs
}
\description{
raw.means.plot is a function for visualizing results of experimental designs
with up to two factors. It plots both raw data (background) and factor/cell
means (foreground) to provide a more accurate visualization of the underlying
distribution.
}
\usage{
raw.means.plot(data, col.offset = 2, col.x = 3, col.value = 4, na.rm = FALSE,
avoid.overlap = c("y", "x", "both"), y.factor = 1, y.amount = NULL,
x.amount = 0.05, pch = 21:25, lty = 1:5, bg.b.col = "darkgrey",
bg.f.col = NULL, fg.b.col = "black",fg.f.col = "black", type = "o",
pt.cex = 1, lwd = 1, xlab = "", ylab = "", ylim, max.offset = 0.2,
xaxis = TRUE, x.labels, xaxt = "n", plot = TRUE, legend = TRUE, mar = NULL,
reset.mar = TRUE, l.pos, yjust = 0.5, l.bty = "n", l.adj = c(0, 0.5), ...)
raw.means.plot2(data, col.id, col.offset, col.x, col.value,
fun.aggregate = "mean", ...)
}
\arguments{
\item{data}{
a \samp{data.frame} in long format (i.e., each datapoint one row,
see \samp{\link{reshape}} or the reshape package) that contains at least
three columns: one column coding the first factor (\samp{col.offset}), one
column coding the second factor (\samp{col.x}), and one column containing
the values (\samp{col.value}).
}
\item{col.id}{
a \samp{character} scalar, specifiying the name of the column specifying the
id column. (only for \samp{raw.means.plot2})
}
\item{col.offset}{
a \samp{character} or \samp{numeric} (only \samp{raw.means.plot}) scalar,
specifiying either name or number of the column coding the different lines
(the offset or first factor).
}
\item{col.x}{
a \samp{character} or \samp{numeric} (only \samp{raw.means.plot}) scalar,
specifiying either name or number of the column coding the x-axis factor.
Default is 3.
}
\item{col.value}{
a \samp{character} or \samp{numeric} (only \samp{raw.means.plot}) scalar,
specifiying either name or number of the data column. Default is 4.
}
\item{na.rm}{
\samp{logical} indicating whether \samp{NA} values should be stripped before
the computation proceeds. Default is \samp{FALSE}. Throws an error message
if FALSE and NAs are encountered.
}
\item{avoid.overlap}{
character. What should happen to datapoints within one cell of the two
factors that have the same value.
\itemize{
\item \samp{"y"} (the default) \link{jitter} is added so that
overlapping points are distinguishable on the \strong{y}-axis
\item \samp{"x"} \link{jitter} is added so that overlapping points
are distinguishable on the \strong{x}-axis
\item \samp{"both"} \link{jitter} is added so that overlapping points
are distinguishable on both the \strong{y}- and the \strong{x}-axis.
\item anything else. No jitter is added.
}
}
\item{y.factor}{
\samp{factor} for controlling the amount of jitter on the y-axis
(will be passed to \link{jitter}).
}
\item{y.amount}{
\samp{amount} for controlling the amount of jitter on the y-axis
(will be passed to \link{jitter}).
}
\item{x.amount}{
\samp{amount} for controlling the amount of jitter on the x-axis
(will be passed to \link{jitter}).
}
\item{pch}{
\samp{pch} values (plot symbols) taken for plotting the data. Note that
the same values are taken for raw data and means. see \link{points}
for more details. Recycled if too short (with warning). Default is 21:25,
because those are the only values that can be displayed filled and non-filled.
All other values should not be used.
}
\item{lty}{
\samp{lty} values (line types) for connecting the means. See \link{par}
for more details. Recycled if too short (with warning). Default is 1:5.
}
\item{bg.b.col}{
background border color: border color of raw data points. Silently recycled. Default:
\samp{"darkgrey"}
}
\item{bg.f.col}{
background filling color: fill color of raw data points. Silently recycled. Default:
\samp{NULL}
}
\item{fg.b.col}{
foreground border color: border color of mean data points. Silently recycled. Default:
\samp{black}
}
\item{fg.f.col}{
foreground fill color: fill color for mean data points. Silently recycled. Default:
\samp{black}
}
\item{type}{
same as type in \link{plot}. Default: \samp{o} ("overplotted")
}
\item{pt.cex}{
\samp{numeric} specifying the \samp{cex} value used for plotting the points.
Default is 1.
}
\item{lwd}{
\samp{numeric} specifying the \samp{lwd} value used for plotting the lines.
Default is 1.
}
\item{xlab}{
x-axis label. Default: \samp{""}
}
\item{ylab}{
y-axis label. Default: \samp{""}
}
\item{ylim}{
the y-axis limits of the plot. If not specified (the default) will be taken
from data so that all raw data points are visible and a warning message is
displayed specifying the ylim.
}
\item{max.offset}{
\samp{numeric}. maximal offset of factor levels from the offset factor
(\samp{col.offset}) specifying the different lines. The centre of each factor
on the x-axis is at full numbers (starting from 1 to ...). The maximum will
only be reached if the number of factor levels (from \samp{col.offset}) is
even. Default: 0.2.
}
\item{xaxis}{
\samp{logical} value indicating whether or not the x-axis should be generated
by \samp{raw.means.plot}. If \samp{TRUE}, labels for the x-axis will be taken
either from the unique values of \samp{col.x} or can be specified with
\samp{x.labels}.
}
\item{x.labels}{
\samp{character} vector specifiying \samp{col.x} levels. Only relevant if
\samp{xaxis=TRUE}. Then, the values given here will be displayed at the
x-axis for each factor level of \samp{col.x}.
}
\item{xaxt}{
A character which specifies whether ot not the x-axis should be plotted by
the call to plot function. Interfers with the aforementioned \samp{xaxis}
argument and the automatic \samp{xaxis} function by \samp{raw.means.plot}.
Just there for completeness. Default \samp{"n"} (and should not be changed).
}
\item{plot}{
\samp{logical}. Should the \samp{raw.means.plot} be drawn or not. If
\samp{TRUE} (the default) plot will be drawn. If \samp{FALSE} only the legend
will be drawn (if \samp{legend = TRUE}) See details.
}
\item{legend}{
\samp{logical} indicating whether or not \samp{raw.means.plot} should
automatically add a legend on the right outside the plot area indicating
which line and points refer to which \samp{col.offset} factor levels. Default
is \samp{TRUE}.
}
\item{mar}{
\samp{NULL} or \samp{numerical} vector of length 4 indicating the margins of
the plot (see \link{par}). If \samp{NULL} (the default) the right
margin (i.e., \samp{par("mar")[4]}) will be (imperfectly) guessed from the
\samp{col.offset} factors for placing the legend right to the plot. If length
is four this value will be taken. Ignored for \samp{plot = FALSE}.
}
\item{reset.mar}{
\samp{logical} indicating if the margins (\samp{mar}) shall be resetted after
setting internally. Will be ignored if \samp{legend = FALSE}. Default is
\samp{TRUE} and should not be changed (especially with \samp{plot = FLASE}).
}
\item{l.pos}{
\samp{numeric} vector of length 2 indicating the position of the legend. If
not specified automatically determined. See details.
}
\item{yjust}{
how the legend is to be justified relative to the legend y location. A value
of 0 means top, 0.5 means centered and 1 means bottom justified. Default is
0.5.
}
\item{l.bty}{
the type of box to be drawn around the legend. The allowed values are
\samp{"o"} and \samp{"n"} (the default).
}
\item{l.adj}{
\samp{numeric} of length 1 or 2; the string adjustment for legend text. Useful
for y-adjustment when labels are plotmath expression. see \link{legend}
and \link{plotmath} for more info.
}
\item{\dots}{
further arguments which are either passed to plot or legend (or
\samp{raw.means.plot} for \samp{raw.means.plot2}). The following arguments
are passed to legend, all others are passed to plot:
\samp{"fill", "border", "angle", "density", "box.lwd", "box.lty", "box.col",
"pt.cex", "pt.lwd", "xjust", "x.intersp", "y.intersp", "text.width",
"text.col", "merge", "trace", "plot", "ncol", "horiz", "title", "inset",
"title.col", "title.adj"}
}
\item{fun.aggregate}{
Function or function name used for aggregating the data across the two
factors. Default is \samp{"mean"}. (only for \samp{raw.means.plot2})
}
}
\details{
\samp{raw.means.plot2} is probably the more useful function, as it allows for
using a data.frame with more than two-factors and aggregates across the other
factors, but needs a column specifying the experimental unit (e.g.,
participant).
\samp{raw.means.plot} is basically an advanced wrapper for two other
functions: \link{plot} and (if \samp{legend=TRUE})
\link{legend}. Furthermore, raw data is plotted with a call to
\link{points} and the means with a call to \link{lines}.
You can use \samp{raw.means.plot} to plot only a legend by setting
\samp{plot = FALSE} and \samp{legend = TRUE}. Then, \samp{raw.means.plot}
will draw an invisible plot with \samp{xlim = c(0,10)} and
\samp{ylim = c(0, 10)} and place the legend on this invisible plot. You
can specify \samp{l.pos} to position the legend, otherwise it will be plotted
at \samp{c(5,5)} (i.e., in the middle of the plot). Note that
\samp{xpd = TRUE} in the call to \samp{legend} (see \link{par}).
}
\value{
Nothing. This function is invoked for its side effects.
}
\author{
Henrik Singmann (\email{henrik.singmann@psychologie.uni-freiburg.de}) with
ideas from Jim Lemon
}
\seealso{
\link{add.ps} can be used in addition to\samp{raw.means.plot} to
compare the factors at each x-axis position, by adding p-values from t-tests
to the x-axis.
}
\examples{
x <- data.frame(id = 1:150, offset = rep(c("Group A", "Group B", "Group C"),
each = 50), xaxis = sample(c("A", "B", "C", "D"),150, replace = TRUE),
data = c(rnorm(50, 10, 5), rnorm(50, 15,6), rnorm(50, 20, 5)))
raw.means.plot(x)
raw.means.plot(x, main = "Example", ylab = "Values", xlab = "Factor",
title = "Groups")
raw.means.plot(x, "offset", "xaxis", "data")
raw.means.plot(x, "xaxis", "offset", "data")
raw.means.plot(x, 3, 2, 4)
# different colors:
raw.means.plot(x, main = "Example", ylab = "Values", xlab = "Factor",
title = "Groups", fg.f.col = c("red","blue", "green"))
x2 <- data.frame(id = 1:150, offset = rep(c("Group A", "Group B", "Group C"),
each = 50), xaxis = sample(c("A", "B", "C", "D"),150, replace = TRUE),
data = c(rnorm(50, 10, 5), rnorm(50, 15,6), rnorm(50, 20, 5)))
layout(matrix(c(1,2,3,3), 2,2,byrow = TRUE), heights = c(7,1))
raw.means.plot(x, main = "Data x1", ylab = "Values", xlab = "Factor",
legend = FALSE, mar = c(4,4,4,1)+0.1)
raw.means.plot(x2, main = "Data x2", ylab = "Values", xlab = "Factor",
legend = FALSE, mar = c(4,4,4,1)+0.1)
raw.means.plot(x2, plot = FALSE, title = "Groups")
y <- data.frame(id = 1:300, offset = rep(1, 300),
axis = sample(LETTERS[1:6],300, replace = TRUE), data = c(rnorm(100, 1),
rnorm(100), rnorm(100,1)))
par(mfrow = c(2,2))
raw.means.plot(y, legend = FALSE)
raw.means.plot(y, type = "p", legend = FALSE)
raw.means.plot(y, type = "l", legend = FALSE)
raw.means.plot(y, 3, 2, x.labels = "one group only")
# Example with overlapping points
z <- data.frame (id = 1:200, offset = rep(c("C 1", "C 2"), 200),
axis = sample(LETTERS[1:4], 200, replace = TRUE),
data = sample(1:20, 200, replace = TRUE))
# x versus y jitter
par(mfrow = c(2,2))
raw.means.plot(z, avoid.overlap = "none", main = "no-jitter")
raw.means.plot(z, main = "y-axis jitter (default)")
raw.means.plot(z, avoid.overlap = "x", main = "x-axis jitter")
raw.means.plot(z, avoid.overlap = "both", main = "both-axis jitter")
# y-axis jitter (default)
par(mfrow = c(2,2))
raw.means.plot(z, avoid.overlap = "none", main = "no jitter")
raw.means.plot(z, y.factor = 0.5, main = "smaller y-jitter")
raw.means.plot(z, main = "standard y-jitter")
raw.means.plot(z, y.factor = 2, main = "bigger y-jitter")
# x-axis jitter (default)
par(mfrow = c(2,2))
raw.means.plot(z, avoid.overlap = "none", main = "no jitter")
raw.means.plot(z, avoid.overlap = "x", x.amount = 0.025,
main = "smaller x -jitter")
raw.means.plot(z, avoid.overlap = "x", main = "standard x-jitter")
raw.means.plot(z, avoid.overlap = "x", x.amount= 0.1,
main = "bigger x-jitter")
\dontrun{
#The examples uses the OBrienKaiser dataset from car and needs reshape.
require(reshape)
require(car)
data(OBrienKaiser)
OBKnew <- cbind(factor(1:nrow(OBrienKaiser)), OBrienKaiser)
colnames(OBKnew)[1] <- "id"
OBK.long <- melt(OBKnew)
OBK.long[, c("measurement", "time")] <-
t(vapply(strsplit(as.character(OBK.long$variable), "\\\."), "[", c("", "")))
raw.means.plot2(OBK.long, "id", "measurement", "gender", "value")
raw.means.plot2(OBK.long, "id", "treatment", "gender", "value")
# also use add.ps:
# For this example the position at each x-axis are within-subject comparisons!
raw.means.plot2(OBK.long, "id", "measurement", "gender", "value")
add.ps(OBK.long, "id", "measurement", "gender", "value", paired = TRUE)
#reference is "fup"
raw.means.plot2(OBK.long, "id", "measurement", "gender", "value")
add.ps(OBK.long, "id", "measurement", "gender", "value", ref.offset = 2,
paired = TRUE) #reference is "post"
# Use R's standard (i.e., Welch test)
raw.means.plot2(OBK.long, "id", "treatment", "gender", "value")
add.ps(OBK.long, "id", "treatment", "gender", "value",
prefixes = c("p(control vs. A)", "p(control vs. B)"))
# Use standard t-test
raw.means.plot2(OBK.long, "id", "treatment", "gender", "value")
add.ps(OBK.long, "id", "treatment", "gender", "value", var.equal = TRUE,
prefixes = c("p(control vs. A)", "p(control vs. B)"))
}
}
|