1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
|
# ---------------------------------------
# Author: Andreas Alfons, Bernd Prantner
# and Daniel Schopfhauser
# Vienna University of Technology
# ---------------------------------------
#' Colored map with information about missing/imputed values
#'
#' Colored map in which the proportion or amount of missing/imputed values in
#' each region is coded according to a continuous or discrete color scheme.
#' The sequential color palette may thereby be computed in the *HCL* or
#' the *RGB* color space.
#'
#' The proportion or amount of missing/imputed values in `x` of each
#' region is coded according to a continuous or discrete color scheme in the
#' color range defined by `col`. In addition, the proportions or numbers
#' can be shown as labels in the regions.
#'
#' If `interactive` is `TRUE`, clicking in a region displays more
#' detailed information about missing/imputed values on the console. Clicking
#' outside the borders quits the interactive session.
#'
#' @rdname colormapMiss
#' @aliases colormapMiss colormapMissLegend
#' @param x a numeric vector.
#' @param region a vector or factor of the same length as `x` giving the
#' regions.
#' @param map an object of any class that contains polygons and provides its
#' own plot method (e.g., `"SpatialPolygons"` from package `sp`).
#' @param imp_index a logical-vector indicating which values of \sQuote{x} have
#' been imputed. If given, it is used for highlighting and the colors are
#' adjusted according to the given colors for imputed variables (see
#' `col`).
#' @param prop a logical indicating whether the proportion of missing/imputed
#' values should be used rather than the total amount.
#' @param polysRegion a numeric vector specifying the region that each polygon
#' belongs to.
#' @param range a numeric vector of length two specifying the range (minimum
#' and maximum) of the proportion or amount of missing/imputed values to be
#' used for the color scheme.
#' @param n for `colormapMiss`, the number of equally spaced cut-off
#' points for a discretized color scheme. If this is not a positive integer, a
#' continuous color scheme is used (the default). In the latter case, the
#' number of rectangles to be drawn in the legend can be specified in
#' `colormapMissLegend`. A reasonably large number makes it appear
#' continuously.
#' @param col the color range (start end end) to be used. RGB colors may be
#' specified as character strings or as objects of class
#' "[colorspace::RGB()]". HCL colors need to be specified as objects
#' of class "[colorspace::polarLUV()]". If only one color is
#' supplied, it is used as end color, while the start color is taken to be
#' transparent for RGB or white for HCL.
#' @param gamma numeric; the display *gamma* value (see
#' [colorspace::hex()]).
#' @param fixup a logical indicating whether the colors should be corrected to
#' valid RGB values (see [colorspace::hex()]).
#' @param coords a matrix or `data.frame` with two columns giving the
#' coordinates for the labels.
#' @param numbers a logical indicating whether the corresponding proportions or
#' numbers of missing/imputed values should be used as labels for the regions.
#' @param digits the number of digits to be used in the labels (in case of
#' proportions).
#' @param cex.numbers the character expansion factor to be used for the labels.
#' @param col.numbers the color to be used for the labels.
#' @param legend a logical indicating whether a legend should be plotted.
#' @param interactive a logical indicating whether more detailed information
#' about missing/imputed values should be displayed interactively (see
#' \sQuote{Details}).
#' @param xleft left *x* position of the legend.
#' @param ybottom bottom *y* position of the legend.
#' @param xright right *x* position of the legend.
#' @param ytop top *y* position of the legend.
#' @param cmap a list as returned by `colormapMiss` that contains the
#' required information for the legend.
#' @param horizontal a logical indicating whether the legend should be drawn
#' horizontally or vertically.
#' @param \dots further arguments to be passed to `plot`.
#' @return `colormapMiss` returns a list with the following components:
#' - nmiss a numeric vector containing the number of missing/imputed
#' values in each region.
#' - nobs a numeric vector containing the number of observations in
#' each region.
#' - pmiss a numeric vector containing the proportion of missing
#' values in each region.
#' - prop a logical indicating whether the proportion of
#' missing/imputed values have been used rather than the total amount.
#' - range the range of the proportion or amount of missing/imputed
#' values corresponding to the color range.
#' - n either a positive integer giving the number of equally spaced
#' cut-off points for a discretized color scheme, or `NULL` for a
#' continuous color scheme.
#' - start the start color of the color scheme.
#' - end the end color of the color scheme.
#' - space a character string giving the color space (either
#' `"rgb"` for RGB colors or `"hcl"` for HCL colors).
#' - gamma numeric; the display *gamma* value (see
#' [colorspace::hex()]).
#' - fixup a logical indicating whether the colors have been
#' corrected to valid RGB values (see [colorspace::hex()]).
#' @note Some of the argument names and positions have changed with versions
#' 1.3 and 1.4 due to extended functionality and for more consistency with
#' other plot functions in `VIM`. For back compatibility, the arguments
#' `cex.text` and `col.text` can still be supplied to \code{\dots{}}
#' and are handled correctly. Nevertheless, they are deprecated and no longer
#' documented. Use `cex.numbers` and `col.numbers` instead.
#' @author Andreas Alfons, modifications to show imputed values by Bernd
#' Prantner
#' @seealso [colSequence()], [growdotMiss()],
#' [mapMiss()]
#' @references M. Templ, A. Alfons, P. Filzmoser (2012) Exploring incomplete
#' data using visualization tools. *Journal of Advances in Data Analysis
#' and Classification*, Online first. DOI: 10.1007/s11634-011-0102-y.
#' @keywords hplot
#' @export
colormapMiss <- function(x, region, map, imp_index = NULL,
prop = TRUE, polysRegion = 1:length(x), range = NULL,
n = NULL, col = c("red","orange"),
gamma = 2.2, fixup = TRUE, coords = NULL,
numbers = TRUE, digits = 2, cex.numbers = 0.8,
col.numbers = par("fg"), legend = TRUE,
interactive = TRUE, ...) {
check_data(x)
x <- as.data.frame(x)
# back compatibility
dots <- list(...)
if(missing(cex.numbers) && "cex.text" %in% names(dots)) {
cex.numbers <- dots$cex.text
}
if(missing(col.numbers) && "col.text" %in% names(dots)) {
col.numbers <- dots$col.text
}
# initializations
imputed <- FALSE
if(!is.null(imp_index)) {
if(any(is.na(x))) {
imputed <- FALSE
warning("'imp_index' is given, but there are missing values in 'x'! 'imp_index' will be ignored.", call. = FALSE)
} else {
if(is.numeric(imp_index) && range(imp_index) == c(0,1)) imp_index <- as.logical(imp_index)
else if(!is.logical(imp_index)) stop("The missing-index of the imputed Variable must be of the type logical")
imputed <- TRUE
}
}
x <- as.vector(x)
region <- as.factor(region)
if(!is.null(coords)) { # error messages
if(!(inherits(coords, c("data.frame","matrix"))))
stop("'coords' must be a data.frame or matrix")
if(ncol(coords) != 2) stop("'coords' must be 2-dimensional")
}
if(is.character(map)) map <- get(map, envir=.GlobalEnv)
prop <- isTRUE(prop)
# check colors
if(!is(col, "RGB") && !is(col, "polarLUV") &&
(!is.character(col) || length(col) == 0 || col == c("red","orange"))) {
if(!imputed) col <- "red"
else col <- "orange"
}
if(is.character(col)) {
# colors given as character string
if(length(col) == 1) {
start <- par("bg")
end <- col
} else {
start <- col[1]
end <- col[2]
}
space <- "rgb"
} else {
space <- if(is(col, "RGB")) "rgb" else "hcl"
if(nrow(coords(col)) == 1) {
if(is(col, "RGB")) {
# RGB colors
start <- par("bg")
} else {
# HCL colors
start <- polarLUV(0, 0, col@coords[1, "H"])
}
end <- col
} else {
start <- col[1,]
end <- col[2,]
}
}
# compute number and proportions of missing values
if(!imputed) nmiss <- tapply(x, list(region), countNA)
else {
getImp <- function(x) length(which(x))
nmiss <- tapply(unlist(imp_index), list(region), getImp)
}
nobs <- tapply(x, list(region), length)
pmiss <- 100*nmiss/nobs
# check breakpoints
if(is.null(range)) {
range <- c(0, if(prop) ceiling(max(pmiss)) else max(nmiss))
} else {
# TODO: check 'range'
}
# get colors for regions
n <- rep(n, length.out=1)
if(isTRUE(n > 1)) {
# equally spaced categories
breaks <- seq(range[1], range[2], length=n+1)
cat <- cut(if(prop) pmiss else nmiss, breaks,
labels=FALSE, include.lowest=TRUE)
pcol <- seq(0, 1, length=n)
cols <- colSequence(pcol, start, end, space, gamma=gamma, fixup=fixup)
cols <- cols[cat]
} else {
# continuous color scheme
n <- NULL
pcol <- if(prop) pmiss else nmiss
pcol <- (pcol - range[1])/diff(range)
cols <- colSequence(pcol, start, end, space, gamma=gamma, fixup=fixup)
}
cols <- cols[polysRegion]
localPlot <- function(..., cex.text, col.text) plot(...)
localPlot(map, col=cols, ...)
if(isTRUE(numbers)) {
# number or percentage of missings as labels for regions
if(is.null(coords)) coords <- coordinates(map)
labs <- if(prop) paste(round(pmiss, digits), "%", sep="") else nmiss
plabs <- labs[polysRegion]
plabs[duplicated(polysRegion)] <- ""
text(coords, labels=plabs, cex=cex.numbers, col=col.numbers)
}
# useful statistics for legend
cmap <- list(nmiss=nmiss, nobs=nobs, pmiss=pmiss, prop=prop, range=range,
n=n, start=start, end=end, space=space, gamma=gamma, fixup=fixup)
if(isTRUE(legend)) {
usr <- par("usr")
xrange <- usr[1:2]
xdiff <- usr[2] - usr[1]
yrange <- usr[3:4]
ydiff <- usr[4] - usr[3]
length <- 1/3
height <- 0.1*length
xleft <- xrange[1] + 0.02*xdiff
xright <- xleft + length*xdiff
ytop <- yrange[2] - 0.02*ydiff
ybottom <- ytop - height*ydiff
colormapMissLegend(xleft, ybottom, xright, ytop,
cmap, cex.numbers=cex.numbers, col.numbers=col.numbers)
}
if(isTRUE(interactive)) {
cat("Click on a region to get more information about missings.\n")
cat("To regain use of the R console, click outside the borders.\n")
p <- locatorVIM()
while(!is.null(p)) {
p <- SpatialPoints(matrix(unlist(p), ncol=2))
poly <- over(p, map)
ind <- polysRegion[poly]
if(!is.na(ind)) {
if(!imputed) label <- "missings"
else label <- "imputed missings"
cat(paste("\n ", levels(region)[ind], ":", sep=""))
cat(paste("\n Number of ", label, ": ", nmiss[ind]))
cat(paste("\n Number of observations:", nobs[ind]))
cat(paste("\n Proportion of ", label, ": ",
round(pmiss[ind], digits), "%\n", sep=""))
p <- locatorVIM()
} else p <- NULL
}
}
# return statistics invisibly
invisible(cmap)
}
## legend
#' @export colormapMissLegend
#' @rdname colormapMiss
colormapMissLegend <- function(xleft, ybottom, xright, ytop, cmap,
# range, prop = FALSE, col = "red",
n = 1000, horizontal = TRUE, digits = 2,
cex.numbers = 0.8, col.numbers = par("fg"),
...) {
# back compatibility
dots <- list(...)
dn <- names(dots)
if(missing(cmap)) {
if("range" %in% dn) range <- dots$range
else stop("argument 'range' is missing, with no default")
prop <- if("prop" %in% dn) dots$prop else FALSE
col <- if("col" %in% dn) dots$col else "red"
cmap <- list(prop=prop, range=range, n=NULL, start=par("bg"),
end=col, space="rgb", gamma=2.4, fixup=TRUE)
}
if(missing(cex.numbers) && "cex.text" %in% dn) cex.numbers <- dots$cex.text
if(missing(col.numbers) && "col.text" %in% dn) col.numbers <- dots$col.text
# initializations
prop <- isTRUE(cmap$prop)
range <- cmap$range
cont <- is.null(cmap$n) # is legend for continuous color scheme?
n <- if(cont) n else cmap$n
n <- rep(n, length.out=1)
# allow to plot legend outside plot region
op <- par(xpd=TRUE)
on.exit(par(op))
# compute steps for legend
length <- xright - xleft
height <- ytop - ybottom
# compute colors for legend
col <- colSequence(seq(0, 1, length=n), cmap$start, cmap$end,
cmap$space, gamma=cmap$gamma, fixup=cmap$fixup)
# compute grid and position of legend
grid <- seq(0, 1, length=n+1)
if(cont) {
pos <- 0:1
ann <- range
} else {
pos <- grid
ann <- seq(range[1], range[2], length=n+1)
}
ann <- if(prop) paste(format(ann, digits), "%", sep="") else ann
# plot legend
# TODO: check space for labels
if(horizontal) {
grid <- grid*length + xleft
if(cont) {
rect(grid[-(n+1)], ybottom, grid[-1], ytop, col=col, border=NA)
rect(xleft, ybottom, xright, ytop, border=NULL)
} else rect(grid[-(n+1)], ybottom, grid[-1], ytop, col=col, border=NULL)
pos <- pos*length + xleft
text(pos, ybottom-0.25*height, labels=ann,
adj=c(0.5,1), cex=cex.numbers, col=col.numbers)
} else {
grid <- grid*height + ybottom
if(cont) {
rect(xleft, grid[-(n+1)], xright, grid[-1], col=col, border=NA)
rect(xleft, ybottom, xright, ytop, border=NULL)
} else rect(xleft, grid[-(n+1)], xright, grid[-1], col=col, border=NULL)
pos <- pos*height + ybottom
text(xright+0.25*length, pos, labels=ann,
adj=c(0,0.5), cex=cex.numbers, col=col.numbers)
}
invisible()
}
|