File: parcoordMiss.R

package info (click to toggle)
r-cran-vim 6.2.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,556 kB
  • sloc: cpp: 141; sh: 12; makefile: 2
file content (362 lines) | stat: -rw-r--r-- 17,496 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
# ----------------------------------------------------------
# Authors: Andreas Alfons, Bernd Prantner, Matthias Templ
#          and Daniel Schopfhauser
#          Vienna University of Technology
# ----------------------------------------------------------



#' Parallel coordinate plot with information about missing/imputed values
#' 
#' Parallel coordinate plot with adjustments for missing/imputed values.
#' Missing values in the plotted variables may be represented by a point above
#' the corresponding coordinate axis to prevent disconnected lines. In
#' addition, observations with missing/imputed values in selected variables may
#' be highlighted.
#' 
#' In parallel coordinate plots, the variables are represented by parallel
#' axes.  Each observation of the scaled data is shown as a line.  Observations
#' with missing/imputed values in selected variables may thereby be
#' highlighted.  However, plotting variables with missing values results in
#' disconnected lines, making it impossible to trace the respective
#' observations across the graph.  As a remedy, missing values may be
#' represented by a point above the corresponding coordinate axis, which is
#' separated from the main plot by a small gap and a horizontal line, as
#' determined by `plotNA`.  Connected lines can then be drawn for all
#' observations.  Nevertheless, a caveat of this display is that it may draw
#' attention away from the main relationships between the variables.
#' 
#' If `interactive` is `TRUE`, it is possible switch between this
#' display and the standard display without the separate level for missing
#' values by clicking in the top margin of the plot. In addition, the variables
#' to be used for highlighting can be selected interactively.  Observations
#' with missing/imputed values in any or in all of the selected variables are
#' highlighted (as determined by `selection`).  A variable can be added to
#' the selection by clicking on a coordinate axis.  If a variable is already
#' selected, clicking on its coordinate axis removes it from the selection.
#' Clicking anywhere outside the plot region (except the top margin, if
#' missing/imputed values exist) quits the interactive session.
#' 
#' @param x a matrix or `data.frame`.
#' @param delimiter a character-vector to distinguish between variables and
#' imputation-indices for imputed variables (therefore, `x` needs to have
#' [colnames()]). If given, it is used to determine the corresponding
#' imputation-index for any imputed variable (a logical-vector indicating which
#' values of the variable have been imputed). If such imputation-indices are
#' found, they are used for highlighting and the colors are adjusted according
#' to the given colors for imputed variables (see `col`).
#' @param highlight a vector giving the variables to be used for highlighting.
#' If `NULL` (the default), all variables are used for highlighting.
#' @param selection the selection method for highlighting missing/imputed
#' values in multiple highlight variables.  Possible values are `"any"`
#' (highlighting of missing/imputed values in *any* of the highlight
#' variables) and `"all"` (highlighting of missing/imputed values in
#' *all* of the highlight variables).
#' @param plotvars a vector giving the variables to be plotted.  If `NULL`
#' (the default), all variables are plotted.
#' @param col if `plotNA` is `TRUE`, a vector of length six giving
#' the colors to be used for observations with different combinations of
#' observed and missing/imputed values in the plot variables and highlight
#' variables (vectors of length one or two are recycled).  Otherwise, a vector
#' of length two giving the colors for non-highlighted and highlighted
#' observations (if a single color is supplied, it is used for both).
#' @param plotNA a logical indicating whether missing values in the plot
#' variables should be represented by a point above the corresponding
#' coordinate axis to prevent disconnected lines.
#' @param alpha a numeric value between 0 and 1 giving the level of
#' transparency of the colors, or `NULL`.  This can be used to prevent
#' overplotting.
#' @param lty if `plotNA` is `TRUE`, a vector of length four giving
#' the line types to be used for observations with different combinations of
#' observed and missing/imputed values in the plot variables and highlight
#' variables (vectors of length one or two are recycled).  Otherwise, a vector
#' of length two giving the line types for non-highlighted and highlighted
#' observations (if a single line type is supplied, it is used for both).
#' @param xlim,ylim axis limits.
#' @param main,sub main and sub title.
#' @param xlab,ylab axis labels.
#' @param labels either a logical indicating whether labels should be plotted
#' below each coordinate axis, or a character vector giving the labels.
#' @param xpd a logical indicating whether the lines should be allowed to go
#' outside the plot region.  If `NULL`, it defaults to `TRUE` unless
#' axis limits are specified.
#' @param interactive a logical indicating whether interactive features should
#' be enabled (see \sQuote{Details}).
#' @param \dots for `parcoordMiss`, further graphical parameters to be
#' passed down (see [graphics::par()]).  For `TKRparcoordMiss`,
#' further arguments to be passed to `parcoordMiss`.
#' @note Some of the argument names and positions have changed with versions
#' 1.3 and 1.4 due to extended functionality and for more consistency with
#' other plot functions in `VIM`.  For back compatibility, the arguments
#' `colcomb` and `xaxlabels` can still be supplied to \code{\dots{}}
#' and are handled correctly.  Nevertheless, they are deprecated and no longer
#' documented.  Use `highlight` and `labels` instead.
#' @author Andreas Alfons, Matthias Templ, modifications by Bernd Prantner
#' @seealso [pbox()]
#' @references Wegman, E. J. (1990) Hyperdimensional data analysis using
#' parallel coordinates. *Journal of the American Statistical Association*
#' **85 (411)**, 664--675.
#' 
#' M. Templ, A. Alfons, P. Filzmoser (2012) Exploring incomplete data using
#' visualization tools.  *Journal of Advances in Data Analysis and
#' Classification*, Online first. DOI: 10.1007/s11634-011-0102-y.
#' @keywords hplot
#' @family plotting functions
#' @examples
#' 
#' data(chorizonDL, package = "VIM")
#' ## for missing values
#' parcoordMiss(chorizonDL[,c(15,101:110)], 
#'     plotvars=2:11, interactive = FALSE)
#' legend("top", col = c("skyblue", "red"), lwd = c(1,1), 
#'     legend = c("observed in Bi", "missing in Bi"))
#' 
#' ## for imputed values
#' parcoordMiss(kNN(chorizonDL[,c(15,101:110)]), delimiter = "_imp" ,
#'     plotvars=2:11, interactive = FALSE)
#' legend("top", col = c("skyblue", "orange"), lwd = c(1,1), 
#'     legend = c("observed in Bi", "imputed in Bi"))
#' 
#' @export
parcoordMiss <- function(x, delimiter = NULL, highlight = NULL, selection = c("any","all"), 
                         plotvars = NULL, plotNA = TRUE, 
                         col = c("skyblue","red","skyblue4","red4","orange","orange4"), 
                         alpha = NULL, lty = par("lty"), xlim = NULL, 
                         ylim = NULL, main = NULL, sub = NULL, 
                         xlab = NULL, ylab = NULL, labels = TRUE, 
                         xpd = NULL, interactive = TRUE, ...) {
  check_data(x)
  x <- as.data.frame(x)
    # initializations and error messages
	imputed <- FALSE # indicates if there are Variables with missing-index
	## delimiter ##
	if(!is.null(delimiter)) {
		tmp <- grep(delimiter, colnames(x)) # Position of the missing-index
		if(length(tmp) > 0) {
			imp_var <- x[, tmp, drop=FALSE]
			x <- x[, -tmp, drop=FALSE]
			
			if(ncol(x) == 0) stop("Only the missing-index is given")
			if(is.matrix(imp_var) && range(imp_var) == c(0,1)) imp_var <- apply(imp_var,2,as.logical)
			
			if(is.null(dim(imp_var))) {
				if(!is.logical(imp_var)) stop("The missing-index of imputed Variables must be of the type logical")
			} else {
				if(!any(as.logical(lapply(imp_var,is.logical)))) stop("The missing-index of imputed Variables must be of the type logical")	
			}
			imputed <- TRUE
		} else {
			warning("'delimiter' is given, but no missing-index-Variable is found", call. = FALSE)
		}
	}
    px <- ncol(x)
    if(is.null(colnames(x))) colnames(x) <- defaultNames(px)
    if(length(highlight) > ncol(x)) stop("'highlight' is too long")
    if(length(plotvars) > ncol(x)) stop("'plotvars' is too long")
    z <- if(is.null(plotvars)) x else x[, plotvars, drop=FALSE]
    pz <- ncol(z)
    if(pz < 2) stop("the data to be plotted must be at least 2-dimensional")
    selection <- match.arg(selection)
    plotNA <- isTRUE(plotNA)
    if(length(col) == 0) col <- c("skyblue","red","skyblue4","red4","orange","orange4")
    if(length(lty) == 0) lty <- par("lty")
    if(length(col) == 1 && length(lty) == 1) {
        stop("same color and line type for observed and missing values")
    }
    if(length(col) == 1) col <- rep.int(col, 6)
    else if(length(col) == 3 || length(col) == 5) col <- rep.int(col[1:2], 3)
    else if(length(col) != 6) col <- rep(col, length.out=6)
    if(length(lty) == 1) lty <- rep.int(lty, 4)
    else if(length(lty) == 3) lty <- rep.int(lty[1:2], 2)
    else if(length(lty) != 4) lty <- rep(lty, length.out=4)
    # semitransparent colors
    if(!is.null(alpha)) col <- alphablend(col, alpha)  
    # prepare data
    if(is.data.frame(z)) z <- data.matrix(z)
    else if(mode(x) != "numeric") mode(x) <- "numeric"
    if(!imputed) missz <- isNA(z, selection="any")
	else missz <- isImp(z, pos = NULL, delimiter = delimiter, imp_var = imp_var, selection = selection)[["missh"]]
    haveNA <- any(missz)
    # default axis limits
    if(is.null(xpd)) xpd <- is.null(xlim) && is.null(ylim)
    if(is.null(xlim)) xlim <- c(1, pz)
    setYlim <- is.null(ylim)
    # back compatibility
    dots <- list(...)
    nmdots <- names(dots)
    if(missing(highlight) && "colcomb" %in% nmdots) {
        if(length(dots$colcomb) && dots$colcomb[1] == "missnonmiss") {
            highlight <- NULL
        } else highlight <- dots$colcomb
    }
    if(missing(labels) && "xaxlabels" %in% nmdots) labels <- dots$xaxlabels
    localWindow <- function(..., colcomb, xaxlabels, log, asp, yaxs) {
        plot.window(..., yaxs=if(is.null(dots$yaxs)) "i" else dots$yaxs)
    }
    localLines <- function(..., colcomb, xaxlabels) lines(...)
    localAxis <- function(..., colcomb, xaxlabels) axis(...)
    localTitle <- function(..., colcomb, xaxlabels) title(...)
    # plot variable names on x-axis?
    x.axis <- TRUE
    if(is.logical(labels)) {
        if(isTRUE(labels)) labels <- NULL
        else x.axis <- FALSE
    }
    # check for infinite values
    iInf <- is.infinite(z)
    for(i in 1:pz) {
        if(any(iInf[, i])) {
            warning(gettextf("variable '%s' contains infinite values", 
                    colnames(z)[i]))
        }
    }
    createPlot <- function() {
        # additional initializations
        showNA <- plotNA && haveNA
        if(showNA){
            yNA <- 1.08
            if(setYlim) ylim <- c(0, yNA)
        } else if(setYlim) ylim <- 0:1
        # find observations with missings in highlight variables
        if(is.null(highlight)) {
			if(!imputed) missh <- isNA(x, selection)
			else missh <- isImp(x, pos = NULL, delimiter = delimiter, imp_var = imp_var, selection = selection)[["missh"]]
		} else {
			if(!imputed) missh <- isNA(x[, highlight, drop=FALSE], selection)
			else missh <- isImp(x[, highlight, drop=FALSE], pos = NULL, delimiter = delimiter, imp_var = imp_var, selection = selection)[["missh"]]
		}
        # initialize plot
        plot.new()
        localWindow(xlim, ylim, ...)
        # get range and transform variables
        sz <- apply(z, 2, 
            function(x) {
                if(!imputed) iNA <- is.na(x)
				else iNA <- isImp(x, pos = NULL, delimiter = delimiter, imp_var = imp_var, selection = "none")[["missh"]]
                s <- if(showNA) ifelse(iNA, yNA, NA) else rep.int(NA, length(x))
                if(any(!iNA)) {
                    iInf <- is.infinite(x)
                    if(any(!iInf)) {
                        r <- range(x[!iInf], na.rm=TRUE)
                        s[!iNA & !iInf] <- (x[!iNA & !iInf]-r[1])/(r[2]-r[1])
                    }
                    s[iInf & x == -Inf] <- 0
                    s[iInf & x == Inf] <- 1
                }
                s
            })
        # plot spearator for NAs in plot variables
        if(showNA) {
            ysep <- (yNA+1)/2
            lines(xlim, rep.int(ysep, 2), col="lightgrey")
            # plot observations with missings only in plot variables
            ind <- which(missz & !missh)
            if(nind <- length(ind)) {
                xobs <- rep.int(c(1:pz, NA), nind)
                szobs <- as.vector(t(cbind(sz[ind, , drop=FALSE], NA)))
                localLines(xobs, szobs, col=col[3], lty=lty[3], xpd=xpd, ...)
            }
            # indices for regular observations
            ind <- which(!missz & !missh)
        } else ind <- which(!missh)
        # plot regular observations
        if(nind <- length(ind)) {
            xobs <- rep.int(c(1:pz, NA), nind)
            szobs <- as.vector(t(cbind(sz[ind, , drop=FALSE], NA)))
            localLines(xobs, szobs, col=col[1], lty=lty[1], xpd=xpd, ...)
        }
        if(showNA) {
            # plot observations with missings in plot and highlight variables
            ind <- which(missz & missh)
            if(nind <- length(ind)) {
                xobs <- rep.int(c(1:pz, NA), nind)
                szobs <- as.vector(t(cbind(sz[ind, , drop=FALSE], NA)))
				if(!imputed) color <- col[4]
				else color <- col[6]
                localLines(xobs, szobs, col=color, lty=lty[4], xpd=xpd, ...)
            }
            # indices for observations with missings only in highlight variables
            ind <- which(!missz & missh)
        } else ind <- which(missh)
        # plot observations with missings only in highlight variables
        if(nind <- length(ind)) {
            xobs <- rep.int(c(1:pz, NA), nind)
            szobs <- as.vector(t(cbind(sz[ind, , drop=FALSE], NA)))
			if(!imputed) color <- col[2]
			else color <- col[5]
            localLines(xobs, szobs, col=color, lty=lty[2], xpd=xpd, ...)
        }
        # plot coordinate axes
        lines(as.vector(rbind(1:pz, 1:pz, NA)), rep.int(c(0,1,NA), pz), 
            col="lightgrey", xpd=xpd)
        # x-axis
        if(x.axis) {
            dots$side <- 1
            dots$at <- 1:pz
            if(is.null(labels)) dots$labels <- colnames(z) 
            else dots$labels <- rep(labels, length.out=pz)
            dots$lty <- 0
            if(is.null(dots$las)) dots$las <- 2
            if(dots$las %in% 2:3) {
                space.vert <- (par("mar")[1]+par("oma")[1]-1)*par("csi")
                ok <- prettyLabels(dots$labels, dots$at, 
                    space.vert, dots$cex.axis)
                if(any(ok)) {
                    dots$at <- dots$at[ok]
                    dots$labels <- dots$labels[ok]
                } else x.axis <- FALSE
            }
        }
        if(x.axis) do.call(localAxis, dots)  # x-axis
        localTitle(main=main, sub=sub, xlab=xlab, ylab=ylab, ...)
    }
    createPlot()
    interactiveDevices <- c("X11","quartz","windows")
    dev <- names(dev.cur())
    if(interactive && any(!is.na(charmatch(interactiveDevices, dev)))) {
        cat(paste("\nClick on a coordinate axis to add to", 
                "or remove from the highlight selection.\n"))
        if(haveNA) {
			if(!imputed) label <- "missing"
			else label <- "imputed missing"
            cat(paste("Click in the top margin to toggle visualizing", 
                    label, " values in the plot variables.\n"))
            cat(paste("To regain use of the VIM GUI and the R console,",
                    "click in any of the other plot margins.\n\n"))
        } else {
            cat(paste("To regain use of the VIM GUI and the R console,",
                    "click outside the plot region.\n\n"))
        }
        # initializations for selection
        cn <- colnames(x)
        if(is.null(highlight)) highlight <- cn
        else if(!is.character(highlight)) highlight <- cn[highlight]
        plotvars <- colnames(z)
        # start interactive session
        highlightInfo(highlight, selection, imputed)  # print out current selection
        usr <- par("usr")
        pt <- locatorVIM(error=TRUE)
        while(!is.null(pt) && class(pt) != "try-error" && 
                max(1, usr[1]) <= pt$x && pt$x < min(px, usr[2]) && 
                max(0, usr[3]) <= pt$y && 
                if(haveNA) TRUE else pt$y <= min(1, usr[4])) {
            if(pt$y <= min(1, usr[4])) {
                # variable selected or deselected
                i <- round(pt$x)
                highlight <- 
                    if(plotvars[i] %in% highlight) 
                        setdiff(highlight, plotvars[i]) 
                    else c(highlight, plotvars[i])
                createPlot()
                highlightInfo(highlight, selection, imputed)  # print out current selection
            } else {
                # toggle separate NA level for missings in plot variables
                plotNA <- !plotNA
                createPlot()
            }
            pt <- locatorVIM(error=TRUE)
        }
        if(inherits(pt, "try-error")) on.exit()
    }
    invisible()
}