1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
|
# plotres.R: plot model residuals
# values for which
W1 <- 1 # model selection
W2CUM <- 2 # cumulative distribution
W3RESID <- 3 # residuals vs fitted
W4QQ <- 4 # qq plot
W5ABS <- 5 # abs residuals vs fitted
W6SQRT <- 6 # sqrt abs residuals vs fitted
W7VLOG <- 7 # abs residuals vs log fitted
W8CUBE <- 8 # cube root of the squared residuals vs log fitted
W9LOGLOG <- 9 # log abs residuals vs log fitted
# values for vs
V1FITTED <- 1 # fitted
V2INDEX <- 2 # obs number
V3RESPONSE <- 3 # response
V4LEVER <- 4 # leverages
plotres <- function(object = stop("no 'object' argument"),
which = 1:4,
info = FALSE,
versus = 1,
standardize = FALSE,
delever = FALSE,
level = 0,
id.n = 3,
labels.id = NULL,
smooth.col = 2,
grid.col = 0,
jitter = 0,
do.par = NULL,
caption = NULL,
trace = 0,
npoints = 3000,
center = TRUE,
type = NULL, # passed to predict
nresponse = NA,
object.name = quote.deparse(substitute(object)),
...) # passed to predict
{
init.global.data()
on.exit({init.global.data(); gc()}) # release memory on exit
object # make sure object exists
trace <- as.numeric(check.integer.scalar(trace, logical.ok=TRUE))
use.submodel <- dota("USE.SUBMODEL", DEF=TRUE, ...) # undoc arg (for parsnip models)
use.submodel <- is.specified(use.submodel)
# Associate the model environment with the object.
# (This is instead of passing it as an argument to plotmo's data access
# functions. It saves a few hundred references to model.env in the code.)
object.env <- get.model.env(object, object.name, trace, use.submodel)
ret <- plotmo_prolog(object, object.name, trace, ...)
object <- ret$object # the original object or a submodel (parsnip)
my.call <- ret$my.call
attr(object, ".Environment") <- object.env
if(!is.numeric(which) || !is.vector(which) || anyNA(which) ||
any(which != floor(which)) || any(which < 1) || any(which > W9LOGLOG)) {
which.err()
}
info <- check.boolean(info)
standardize <- check.boolean(standardize)
delever <- check.boolean(delever)
level <- check.level.arg(level, zero.ok=TRUE)
smooth.col <- get.smooth.col(smooth.col, ...)
grid.col <- dota("col.grid", DEF=grid.col, ...)
if(is.specified(grid.col) && is.logical(grid.col) && grid.col) # grid.col=TRUE
grid.col <- "lightgray"
check.integer.scalar(nresponse, min=1, na.ok=TRUE, logical.ok=FALSE, char.ok=TRUE)
temp <- get.plotres.data(object, object.name, which, standardize, delever,
level, versus, id.n, labels.id,
trace, npoints, type, nresponse,
...,
must.get.rsq=info || trace >= 1) # get rsq only if necessary
nresponse <- temp$nresponse # col index in the response (converted from NA if necessary)
resp.name <- temp$resp.name # used only in automatic caption, may be NULL
type <- temp$type # always a string (converted from NULL if necessary)
rinfo <- temp$rinfo # resids, scale, name, etc.
vinfo <- temp$vinfo # versus.mat, icolumns, nversus, etc.
fitted <- temp$fitted # n x 1 numeric matrix, colname is "Fitted"
which <- temp$vinfo$which # plots we don't want will have been removed
id.n <- temp$id.n # forced to zero if row indexing changed
npoints <- temp$npoints # special values have been converted
rsq <- temp$rsq # r-squared on the training data
possible.biglm.warning(object, trace)
nfigs <- length(which) * length(vinfo$icolumns)
if(nfigs == 0) {
if(trace >= 0)
warning0("plotres: nothing to plot")
return(invisible(NULL))
}
do.par <- check.do.par(do.par, nfigs) # do.par is 0, 1, or 2
# Prepare caption --- we need it now for do.par() but
# can only display it later after at least one plot.
caption <- get.caption(nfigs, do.par, caption, resp.name, type,
getCall(object), object.name, my.call)
if(do.par) {
oldpar <- par(no.readonly=TRUE)
do.par(nfigs = nfigs, caption=caption,
main1=NA, # nlines.in.main below explicitly specified below
xlab1 = dota("xlab", DEF=NULL, ...), # used only for margin spacing
ylab1 = dota("ylab", DEF=NULL, ...), # ditto
trace = trace,
nlines.in.main = # nbr of lines in main is needed for margins
nlines.in.plotres.main(object=object, which=which, versus=versus,
standardize=standardize, delever=delever, level=level, ...),
def.font.main = 1, # for compat with lm.plot
...)
if(do.par == 1)
on.exit(par(oldpar), add=TRUE)
} else { # do.par=FALSE
oldpar <- do.par.dots(..., trace=trace)
if(length(oldpar))
on.exit(do.call(par, oldpar), add=TRUE)
}
# force.auto.resids.xlim is for back compat with old versions of earth.
# To pass ylim to the w1 plot, use a w1. prefix, just like any other arg.
# So plain ylim gets passed to the residuals plot not the w1 plot.
# But for backwards compatibility when the w1 plot is
# an earth model pass plain ylim to the w1 plot unless w1.ylim is set
force.auto.resids.xlim <-
length(which) > 1 && (W1 %in% which) && inherits(object, "earth") &&
!is.dot("w1.xlim", ...)
force.auto.resids.ylim <-
length(which) > 1 && (W1 %in% which) && inherits(object, "earth") &&
!is.dot("w1.ylim", ...)
w1.retval <- list(plotted=FALSE, retval=NULL)
w3.retval <- NULL
attempted.w1.plot <- FALSE
if(any(which == W1)) {
w1.retval <- plot_w1(object=object, which=which, info=info,
standardize=standardize, delever=delever, level=level, versus=versus,
id.n=id.n, labels.id=rinfo$labs, smooth.col=smooth.col,
grid.col=grid.col, do.par=do.par,
# must do caption here if will not call plot1 later
caption=if(all(which == W1)) caption else "", trace=trace,
npoints=npoints, center=center, type=type, nresponse=nresponse,
object.name=object.name,
...)
attempted.w1.plot <- TRUE
which <- which[which != W1]
if(length(which) == 0 && !w1.retval$plotted && trace >= 0)
warning0("plotres: nothing to plot")
}
if(length(which) == 0) # nothing more to plot?
return(invisible(if(attempted.w1.plot) w1.retval$retval else w3.retval))
# we do this after the w1 call so we pass NULL to w1 if labels.id were NULL
if(is.null(rinfo$labs))
rinfo$labs <- paste(1:length(rinfo$resids))
# We plot only the residuals in iresids, but use all the
# residuals for calculating densities (where "all" actually means
# a maximum of NMAX cases, see the previous call to get.isubset).
#
# The "use.all=(nversus == V4LEVER)" keeps things easier later
# for leverage plots, but it would be nice to not have to use it.
iresids <- get.isubset(rinfo$resids, npoints, id.n,
use.all=(vinfo$nversus == V4LEVER), rinfo$scale)
xlim <- dota("xlim", DEF=NULL, ...) # TODO what is this?
for(icolumn in vinfo$icolumns) {
for(iwhich in seq_along(which)) {
if(which[iwhich] == W2CUM)
plotmo_cum(rinfo=rinfo, info=info, nfigs=nfigs, add=FALSE,
cum.col1=NA, grid.col=grid.col, jitter=0, ...)
else if(which[iwhich] == W4QQ)
plotmo_qq(rinfo=rinfo, info=info, nfigs=nfigs,
grid.col=grid.col, smooth.col=smooth.col,
id.n=id.n, iresids=iresids, npoints=npoints,
force.auto.resids.ylim=force.auto.resids.ylim,
...)
else
w3.retval <- plotresids(object=object, which=which[iwhich],
info=info, standardize=standardize, level=level,
# versus1 is what we plot along the x axis, a vector
versus1=vinfo$versus.mat[, icolumn],
id.n=id.n, smooth.col=smooth.col,
grid.col=grid.col, jitter=jitter,
npoints=npoints, center=center,
type=type,
fitted=fitted, rinfo=rinfo, rsq=rsq, iresids=iresids,
nversus=vinfo$nversus,
colname.versus1=colnames(vinfo$versus.mat)[icolumn],
force.auto.resids.xlim=force.auto.resids.xlim,
force.auto.resids.ylim=force.auto.resids.ylim,
...)
}
}
draw.caption(caption, ...)
if(trace >= 1)
printf("\ntraining rsq %.2f\n", rsq)
invisible(if(attempted.w1.plot) w1.retval$retval else w3.retval)
}
which.err <- function()
{
stop0("Bad value for which\n",
"Allowed values for which:\n",
" 1 Model\n",
" 2 Cumulative distribution\n",
" 3 Residuals vs fitted\n",
" 4 QQ plot\n",
" 5 Abs residuals vs fitted\n",
" 6 Sqrt abs residuals vs fitted\n",
" 7 Abs residuals vs log fitted\n",
" 8 Cube root of the squared residuals vs log fitted\n",
" 9 Log abs residuals vs log fitted")
}
versus.err <- function()
{
stop0("versus must be an integer or a string:\n",
" 1 fitted (default)\n",
" 2 observation numbers\n",
" 3 response\n",
" 4 leverages\n",
" \"\" predictors\n",
" \"b:\" basis functions")
}
nlines.in.plotres.main <- function(object, which, versus,
standardize, delever, level, ...)
{
w1.does.own.mar4 <- # these models do their own top margin spacing in w1 plot
inherits(object, c("gbm", "GBMFit", "glmnet", "multnet"))
auto.main.has.extra.line <- # conservative guess if main will have two lines
standardize || delever || level ||
any(which %in% W6SQRT:W9LOGLOG) ||
(versus %in% V4LEVER) || is.character(versus)
max(1 + auto.main.has.extra.line,
nlines(dota("main", ...)),
1 + if(w1.does.own.mar4) 0 else nlines(dota("w1.main", ...)))
}
|