1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
|
# grid.R: functions for creating the grid of values to be plotted in plotmo graphs
# Get the x matrix (actually a data.frame) with median values (or first level
# for factors), ngrid1 rows, all rows identical, nrow(xgrid) is ngrid1.
get.degree1.xgrid <- function(x, grid.func, grid.levels, pred.names, ngrid1)
{
stopifnot(!is.null(pred.names))
check.grid.levels.arg(x, grid.levels, pred.names)
xgrid <- data.frame(matrix(0, ngrid1, ncol(x), byrow=TRUE))
for(ipred in seq_len(ncol(x)))
xgrid[[ipred]] <- get.fixed.gridval(x[[ipred]], pred.names[ipred],
grid.func, grid.levels)
warn.if.not.all.finite(xgrid, "'xgrid' for degree1 plots")
colnames(xgrid) <- pred.names
xgrid
}
# Update xgrid for the predictor currently being plotted.
# That is, replace this predictor's column with a range of values.
# For factors or discrete variables, we shorten the frame to match the nbr of levels.
get.degree1.xframe <- function(xgrid, x, ipred, ngrid1,
ndiscrete, ux.list, extend, mean)
{
x1 <- x[[ipred]]
# uxlist is a list, each elem is the unique levels for corresponding column of x
u1 <- ux.list[[ipred]]
if(is.factor(x1) && length(u1) > ngrid1)
stop0("ngrid1=", ngrid1, " is less than the number ",
"of levels ", length(u1), " in '",
colnames(x)[ipred],
"'\n Workaround: call plotmo with ngrid1=", length(u1))
if(is.factor(x1) || is.logical(x1) || length(u1) <= ndiscrete) {
levels <- get.all.levs(x1, u1)
xframe <- xgrid[1:length(levels), , drop=FALSE] # shorten xframe
xframe[[ipred]] <- levels
} else {
xframe <- xgrid
xrange <- range1(x1)
if(extend != 0) {
# extend the range of x (TODO consider allowing extend with discrete vars)
stopifnot(xrange[2] >= xrange[1])
ext <- extend * (xrange[2] - xrange[1])
xrange[1] <- xrange[1] - ext
xrange[2] <- xrange[2] + ext
}
xval <- seq(from=xrange[1], to=xrange[2], length.out=ngrid1)
# # following commented out because it causes cliffs to slope more than necessary
# # e.g. test.fac.R plotmo(rpart(survived ~ pclass.num+parch.int, data=et))
# if(is.integer(x1)) {
# xval <- unique(as.integer(xval))
# if(length(xval) < ngrid1)
# xframe <- xframe[1:length(xval), , drop=FALSE] # shorten xframe
# }
xframe[[ipred]] <- xval
}
xframe
}
# We want to display discrete variables in degree1 plots as quantized.
# (Factors get handled elsewhere.) So if a variable is discrete, then
# modify the xframe and yhat to do so. For example, an xframe that was
#
# pclass yhat
# 1 1.1
# 2 2.2
# 3 3.3
#
# becomes
#
# pclass yhat
# 1.0 1.1
# 1.5 1.1
# 1.5 2.2
# 2.5 2.2
# 2.5 3.3
# 3.0 3.3
blockify.degree1.frame <- function(xframe, yhat, intervals,
ipred, ux.list, ndiscrete)
{
u1 <- ux.list[[ipred]]
# TODO the integral check is necessary for compatibility with blockify.degree2.frame
# (the code here can handle non integers but the code in blockify.degree2.frame can't)
if(length(u1) <= ndiscrete && !is.factor(xframe[[ipred]]) &&
!inherits(u1, "Date") && is.integral(u1)) {
# discrete, so duplicate each elem in yhat
yhat <- rep(yhat, each=2)
if(!is.null(intervals)) {
new.intervals <- data.frame(
lwr = rep(intervals$lwr, each=2),
upr = rep(intervals$upr, each=2))
if(!is.null(intervals$fit))
new.intervals$fit <- rep(intervals$fit, each=2)
if(!is.null(intervals$cint.lwr)) {
new.intervals$cint.lwr <- rep(intervals$cint.lwr, each=2)
new.intervals$cint.upr <- rep(intervals$cint.upr, each=2)
}
intervals <- new.intervals
}
# duplicate each row of xframe, except the first and last row
xframe <- xframe[rep(seq_len(nrow(xframe)), each=2), , drop=FALSE]
if(nrow(xframe) >= 4) {
x1 <- xframe[[ipred]]
for(i in seq(2, length(x1)-1, by=2))
x1[i] <- x1[i+1] <- (x1[i] + x1[i+1]) / 2
xframe[[ipred]] <- x1
}
}
list(xframe=xframe, yhat=yhat, intervals=intervals)
}
# Get the x matrix (actually a data.frame) to plot in degree2 plots.
# Each row of xgrid is identical (the medians).
get.degree2.xgrid <- function(x, grid.func, grid.levels, pred.names, ngrid2)
{
check.grid.levels.arg(x, grid.levels, pred.names)
xgrid <- list(ncol(x))
for(ipred in seq_len(ncol(x)))
xgrid[[ipred]] <- get.fixed.gridval(x[[ipred]], pred.names[ipred],
grid.func, grid.levels)
warn.if.not.all.finite(xgrid, "'xgrid' for degree2 plots")
xgrid <- as.data.frame(xgrid)
colnames(xgrid) <- pred.names
xgrid[seq_len(ngrid2^2), ] <- xgrid
xgrid
}
# Update xgrid for the predictor pair currently being plotted (ipred1
# and ipred2 are column numbers in x). That is, replace two columns
# with a range of values.
#
# This will also shorten xgrid if possible (i.e. if predictor is discrete
# with number of discrete values less than ngrid2, typically because
# predictor is a factor.) This shortening is for efficiency later,
# because it means we avoid duplicate cases in xgrid.
get.degree2.xframe <- function(xgrid, x, ipred1, ipred2,
ngrid2, xranges, ux.list, ndiscrete)
{
ret1 <- get.degree2.xframe.aux(xgrid, x, ipred1,
ngrid2, xranges, ux.list, ndiscrete)
ret2 <- get.degree2.xframe.aux(xgrid, x, ipred2,
ngrid2, xranges, ux.list, ndiscrete)
# pack x1grid and x2grid into xgrid
if(ret1$n != ngrid2 || ret2$n != ngrid2)
xgrid <- xgrid[1:(ret1$n * ret2$n), , drop=FALSE] # shorten xgrid
xgrid[[ipred1]] <- ret1$xgrid # will recycle
xgrid[[ipred2]] <- rep(ret2$xgrid, each=ret1$n)
list(xframe=xgrid, x1grid=ret1$xgrid, x2grid=ret2$xgrid)
}
get.degree2.xframe.aux <- function(xgrid, x, ipred1,
ngrid2, xranges, ux.list, ndiscrete)
{
n1 <- ngrid2 # will change if ipred1 is discrete
u1 <- ux.list[[ipred1]]
nlevs1 <- length(u1)
if(is.factor(x[[ipred1]]) && nlevs1 > ngrid2)
stop0("ngrid2=", ngrid2, " is less than the number",
" of levels ", nlevs1, " in '",
colnames(x)[ipred1],
"'\n Workaround: call plotmo with ngrid2=", length(u1))
x1 <- x[[ipred1]]
x1grid <-
if(is.factor(x1) || is.logical(x1) || nlevs1 <= ndiscrete) { # discrete?
n1 <- nlevs1
x1grid <- get.all.levs(x1, u1)
} else
seq(from=xranges[1,ipred1], to=xranges[2,ipred1], length.out=ngrid2)
if(is.integer(x1)) {
x1grid <- unique(as.integer(x1grid))
n1 <- length(x1grid)
}
list(xgrid=x1grid, n=n1)
}
# we want to draw discrete variables in persp and contour plots using "blocks"
blockify.degree2.frame <- function(x, yhat, x1grid, x2grid,
ipred1, ipred2, ux.list, ndiscrete)
{
is.discrete2 <- function(ipred, x1grid)
{
if(is.factor(x[[ipred]]))
return(TRUE)
u1 <- ux.list[[ipred]]
# the integral check is necessary with the current
# implementation which adds/subtracts a hardcoded .499
# TODO make this like blockify.degree1.frame (which can handle non integers)
length(u1) <= ndiscrete && is.integral(x1grid)
}
if(is.discrete2(ipred1, x1grid)) {
yhat <- rep(yhat, each=2) # duplicate each elem in yhat
x1grid <- rep(x1grid, each=2) # duplicate each elem in x1grid
is.even <- (1:length(x1grid)) %% 2 == 0
x1grid[!is.even] <- x1grid[!is.even] - .499 # sub .5 from odd elems
x1grid[is.even] <- x1grid[is.even] + .499 # add .5 to even elems
}
if(is.discrete2(ipred2, x2grid)) {
# duplicate each block in yhat (each block has n1 elements)
y.old <- yhat
yhat <- double(2 * length(yhat))
n1 <- length(x1grid)
for(i in 1:length(x2grid)) {
start <- n1 * (i-1)
end <- n1 * i
yhat[(2 * start + 1): (2 * end)] <- y.old[(start + 1): end]
}
x2grid <- rep(x2grid, each=2) # duplicate each elem in x2grid
is.even <- (1:length(x2grid)) %% 2 == 0
x2grid[!is.even] <- x2grid[!is.even] - .499 # sub .5 from odd elems
x2grid[is.even] <- x2grid[is.even] + .499 # add .5 to even elems
}
list(yhat=yhat, x1grid=x1grid, x2grid=x2grid)
}
# if x is a factor
# return a factor vector with nlevs elements, e.g. pclass1, pclass2, pclass3.
# else
# return a vector with all unique values in x, e.g. 1,2,3 or FALSE, TRUE
get.all.levs <- function(x, levels)
{
if(!is.factor(x))
return(levels)
# TODO Sanity check, quite expensive, make sure no gaps in factor coding
# Could remove this if convert levels to factors in a better way below?
range <- range(as.numeric(x), na.rm=TRUE)
if(range[1] < 1 || range[2] > length(levels))
stop0("internal error: illegal factor range ", range[1], " ", range[2],
" for levels ", quotify(levels))
if(is.ordered(x))
ordered(1:length(levels), labels=levels)
else
factor(1:length(levels), labels=levels)
}
# Print the grid values, must do some finagling for a nice display
print_grid_values <- function(xgrid, trace)
{
trace1(trace, "\n") # extra space when tracing
row <- xgrid[1, , drop=FALSE]
names(row) <- c(paste("plotmo grid: ", names(row)[1]), names(row)[-1])
rownames(row) <- ""
print(row)
trace1(trace, "\n")
}
|