1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/aggr.R
\name{aggr}
\alias{aggr}
\alias{plot.aggr}
\alias{print.aggr}
\alias{summary.aggr}
\alias{print.summary.aggr}
\title{Aggregations for missing/imputed values}
\usage{
aggr(x, delimiter = NULL, plot = TRUE, ...)
\method{plot}{aggr}(
x,
col = c("skyblue", "red", "orange"),
bars = TRUE,
numbers = FALSE,
prop = TRUE,
combined = FALSE,
varheight = FALSE,
only.miss = FALSE,
border = par("fg"),
sortVars = FALSE,
sortCombs = TRUE,
ylabs = NULL,
axes = TRUE,
labels = axes,
cex.lab = 1.2,
cex.axis = par("cex"),
cex.numbers = par("cex"),
gap = 4,
...
)
\method{print}{aggr}(x, ..., digits = NULL)
\method{summary}{aggr}(object, ...)
\method{print}{summary.aggr}(x, ...)
}
\arguments{
\item{x}{an object of class \code{"summary.aggr"}.}
\item{delimiter}{a character-vector to distinguish between variables and
imputation-indices for imputed variables (therefore, \code{x} needs to have
\code{\link[=colnames]{colnames()}}). If given, it is used to determine the corresponding
imputation-index for any imputed variable (a logical-vector indicating which
values of the variable have been imputed). If such imputation-indices are
found, they are used for highlighting and the colors are adjusted according
to the given colors for imputed variables (see \code{col}).}
\item{plot}{a logical indicating whether the results should be plotted (the
default is \code{TRUE}).}
\item{\dots}{Further arguments, currently ignored.}
\item{col}{a vector of length three giving the colors to be used for
observed, missing and imputed data. If only one color is supplied, it is
used for missing and imputed data and observed data is transparent. If only
two colors are supplied, the first one is used for observed data and the
second color is used for missing and imputed data.}
\item{bars}{a logical indicating whether a small barplot for the frequencies
of the different combinations should be drawn.}
\item{numbers}{a logical indicating whether the proportion or frequencies of
the different combinations should be represented by numbers.}
\item{prop}{a logical indicating whether the proportion of missing/imputed
values and combinations should be used rather than the total amount.}
\item{combined}{a logical indicating whether the two plots should be
combined. If \code{FALSE}, a separate barplot on the left hand side shows
the amount of missing/imputed values in each variable. If \code{TRUE}, a
small version of this barplot is drawn on top of the plot for the
combinations of missing/imputed and non-missing values. See
\dQuote{Details} for more information.}
\item{varheight}{a logical indicating whether the cell heights are given by
the frequencies of occurrence of the corresponding combinations.}
\item{only.miss}{a logical indicating whether the small barplot for the
frequencies of the combinations should only be drawn for combinations
including missing/imputed values (if \code{bars} is \code{TRUE}). This is
useful if most observations are complete, in which case the corresponding
bar would dominate the barplot such that the remaining bars are too
compressed. The proportion or frequency of complete observations (as
determined by \code{prop}) is then represented by a number instead of a bar.}
\item{border}{the color to be used for the border of the bars and
rectangles. Use \code{border=NA} to omit borders.}
\item{sortVars}{a logical indicating whether the variables should be sorted
by the number of missing/imputed values.}
\item{sortCombs}{a logical indicating whether the combinations should be
sorted by the frequency of occurrence.}
\item{ylabs}{if \code{combined} is \code{TRUE}, a character string giving
the y-axis label of the combined plot, otherwise a character vector of
length two giving the y-axis labels for the two plots.}
\item{axes}{a logical indicating whether axes should be drawn.}
\item{labels}{either a logical indicating whether labels should be plotted
on the x-axis, or a character vector giving the labels.}
\item{cex.lab}{the character expansion factor to be used for the axis
labels.}
\item{cex.axis}{the character expansion factor to be used for the axis
annotation.}
\item{cex.numbers}{the character expansion factor to be used for the
proportion or frequencies of the different combinations}
\item{gap}{if \code{combined} is \code{FALSE}, a numeric value giving the
distance between the two plots in margin lines.}
\item{digits}{the minimum number of significant digits to be used (see
\code{\link[=print.default]{print.default()}}).}
\item{object}{an object of class \code{"aggr"}.}
}
\value{
for \code{aggr}, a list of class \code{"aggr"} containing the
following components:
\itemize{
\item x the data used.
\item combinations a character vector representing the combinations of
variables.
\item count the frequencies of these combinations.
\item percent the percentage of these combinations.
\item missings a \code{data.frame} containing the amount of
missing/imputed values in each variable.
\item tabcomb the indicator matrix for the combinations of variables.
}
a list of class \code{"summary.aggr"} containing the following
components:
\itemize{
\item missings a \code{data.frame} containing the amount of missing or
imputed values in each variable.
\item combinations a \code{data.frame} containing a character vector
representing the combinations of variables along with their frequencies and
percentages.
}
}
\description{
Calculate or plot the amount of missing/imputed values in each variable and
the amount of missing/imputed values in certain combinations of variables.
Print method for objects of class \code{"aggr"}.
Summary method for objects of class \code{"aggr"}.
Print method for objects of class \code{"summary.aggr"}.
}
\details{
Often it is of interest how many missing/imputed values are contained in
each variable. Even more interesting, there may be certain combinations of
variables with a high number of missing/imputed values.
If \code{combined} is \code{FALSE}, two separate plots are drawn for the
missing/imputed values in each variable and the combinations of
missing/imputed and non-missing values. The barplot on the left hand side
shows the amount of missing/imputed values in each variable. In the
\emph{aggregation plot} on the right hand side, all existing combinations of
missing/imputed and non-missing values in the observations are visualized.
Available, missing and imputed data are color coded as given by \code{col}.
Additionally, there are two possibilities to represent the frequencies of
occurrence of the different combinations. The first option is to visualize
the proportions or frequencies by a small bar plot and/or numbers. The
second option is to let the cell heights be given by the frequencies of the
corresponding combinations. Furthermore, variables may be sorted by the
number of missing/imputed values and combinations by the frequency of
occurrence to give more power to finding the structure of missing/imputed
values.
If \code{combined} is \code{TRUE}, a small version of the barplot showing
the amount of missing/imputed values in each variable is drawn on top of the
aggregation plot.
The graphical parameter \code{oma} will be set unless supplied as an
argument.
}
\note{
Some of the argument names and positions have changed with version 1.3
due to extended functionality and for more consistency with other plot
functions in \code{VIM}. For back compatibility, the arguments \code{labs}
and \code{names.arg} can still be supplied to \code{\dots{}} and are handled
correctly. Nevertheless, they are deprecated and no longer documented. Use
\code{ylabs} and \code{labels} instead.
}
\examples{
data(sleep, package="VIM")
## for missing values
a <- aggr(sleep)
a
summary(a)
## for imputed values
sleep_IMPUTED <- kNN(sleep)
a <- aggr(sleep_IMPUTED, delimiter="_imp")
a
summary(a)
data(sleep, package = "VIM")
a <- aggr(sleep, plot=FALSE)
a
data(sleep, package = "VIM")
summary(aggr(sleep, plot=FALSE))
data(sleep, package = "VIM")
s <- summary(aggr(sleep, plot=FALSE))
s
}
\references{
M. Templ, A. Alfons, P. Filzmoser (2012) Exploring incomplete
data using visualization tools. \emph{Journal of Advances in Data Analysis
and Classification}, Online first. DOI: 10.1007/s11634-011-0102-y.
}
\seealso{
\code{\link[=print.aggr]{print.aggr()}}, \code{\link[=summary.aggr]{summary.aggr()}}
\code{\link[=aggr]{aggr()}}
\code{\link[=print.summary.aggr]{print.summary.aggr()}}, \code{\link[=aggr]{aggr()}}
\code{\link[=summary.aggr]{summary.aggr()}}, \code{\link[=aggr]{aggr()}}
Other plotting functions:
\code{\link{barMiss}()},
\code{\link{histMiss}()},
\code{\link{marginmatrix}()},
\code{\link{marginplot}()},
\code{\link{matrixplot}()},
\code{\link{mosaicMiss}()},
\code{\link{pairsVIM}()},
\code{\link{parcoordMiss}()},
\code{\link{pbox}()},
\code{\link{scattJitt}()},
\code{\link{scattMiss}()},
\code{\link{scattmatrixMiss}()},
\code{\link{spineMiss}()}
}
\author{
Andreas Alfons, Matthias Templ, modifications for displaying imputed
values by Bernd Prantner
Matthias Templ, modifications by Andreas Alfons and Bernd Prantner
Matthias Templ, modifications by Andreas Alfons
Andreas Alfons, modifications by Bernd Prantner
}
\concept{plotting functions}
\keyword{hplot}
\keyword{print}
|