1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
|
# test.mlr.R: test the "mlr" package with plotmo and plotres
#
# TODO mlr is in maintenance mode, add mlr3 support to plotmo?
# TODO generally, plotres residuals for WrappedModel prob models aren't right
source("test.prolog.R")
library(mlr)
library(plotmo)
library(rpart.plot)
library(earth)
# TODO following function is temporary until mlr package is updated
train.with.call <- function(learner, task, subset=NULL, weights=NULL)
{
retval <- train(learner, task, subset, weights)
retval$call <- match.call()
retval
}
cat("==simple one variable regression model with earth ===============================\n")
data(trees)
trees1 <- trees[,c("Volume", "Girth")]
task <- makeRegrTask(data=trees1, target="Volume")
lrn <- makeLearner("regr.earth", degree=2)
regr.earth.with.call = train.with.call(lrn, task)
regr.earth = train(lrn, task)
earth <- earth(Volume~., data=trees1, degree=2)
# SHOWCALL is just a testing thing, so we can see who created the plot on the plot itself
plotres(regr.earth.with.call, SHOWCALL=TRUE)
plotres(regr.earth$learner.model, SHOWCALL=TRUE)
plotres(earth, SHOWCALL=TRUE)
plotmo(regr.earth.with.call, trace=1, SHOWCALL=TRUE)
plotmo(regr.earth$learner.model, trace=1, SHOWCALL=TRUE)
plotmo(earth, trace=1, SHOWCALL=TRUE)
# compare partial dependence plots from mlr and plotmo packages
set.seed(2018)
plotmo(earth, pmethod="partdep", SHOWCALL=TRUE, col=2, pt.col="darkgray", grid.col="lightgray")
set.seed(2018)
pd <- generatePartialDependenceData(regr.earth, task, "Girth", n=c(50, NA))
print(plotPartialDependence(pd, data = getTaskData(task)))
cat("==test error handling if original data is messed up===========================\n")
par(mfrow=c(4,2), mar=c(1.5,2.5,4,1), oma=c(0,0,0,0))
colnames(trees1) <- c("nonesuch", "Volume")
plotmo(regr.earth$learner.model, do.par=0, degree1=1, degree2=0, main='colnames(trees1) <- c("nonesuch", "Volume")')
plotmo(regr.earth.with.call, do.par=0, degree1=1, degree2=0)
par(org.par)
expect.err(try(plotmo(earth, degree1=1, degree2=0)), "cannot get the original model predictors")
cat("==regression model with randomForest (binary response)============================\n")
library(randomForest)
library(earth) # for etitanic data
data(etitanic)
set.seed(2018)
# use a logical subset (since we test for numeric subset elsewhere)
# use a small subset so we can see easily if subset is applied or ignored in plots
train.subset <- rnorm(nrow(etitanic)) > 1 # 166 cases ((16% of 1046 cases))
printf("sum(train.subset) %g (%.0f%% of %g cases)\n", sum(train.subset),
100 * sum(train.subset) / nrow(etitanic), nrow(etitanic))
task.regr.rf <- makeRegrTask(data=etitanic, target="survived")
lrn.regr.rf = makeLearner("regr.randomForest")
set.seed(2018)
regr.rf.with.call = train.with.call(lrn.regr.rf, task.regr.rf, subset=train.subset)
set.seed(2018)
rf <- randomForest(survived~., data=etitanic, subset=train.subset)
# sanity check that the models are identical
stopifnot(identical(predict(regr.rf.with.call$learner.model), predict(rf)))
plotres(regr.rf.with.call, info=TRUE, SHOWCALL=TRUE)
# plotres(regr.rf$learner.model, info=TRUE, SHOWCALL=TRUE) # Error: no formula in getCall(object)
plotres(rf, info=TRUE, SHOWCALL=TRUE)
set.seed(2018) # for repeatable jitter in points (specified with pt.col)
plotmo(regr.rf.with.call, pt.col=2, SHOWCALL=TRUE)
# plotmo(regr.rf$learner.model, trace=1, SHOWCALL=TRUE) # Error: no formula in getCall(object)
set.seed(2018)
plotmo(rf, pt.col=2, SHOWCALL=TRUE)
# compare partial dependence plots
set.seed(2018)
plotmo(regr.rf.with.call, degree1="age", degree2=0, pmethod="partdep",
grid.col="gray", col=2, pt.col="darkgray", SHOWCALL=TRUE)
# function from randomForest package
set.seed(2018)
partialPlot(rf, pred.data=etitanic[train.subset,], x.var="age", n.pt=50, ylim=c(0, 1))
grid()
# function from mlr package
set.seed(2018)
pd <- generatePartialDependenceData(regr.rf.with.call, task.regr.rf, "age", n=c(50, NA))
print(plotPartialDependence(pd, data = getTaskData(task.regr.rf)))
plotmo(regr.rf.with.call, degree1="pclass", degree2=0, pmethod="partdep", SHOWCALL=TRUE)
set.seed(2018)
# function from randomForest package
set.seed(2018)
partialPlot(rf, pred.data=etitanic[train.subset,], x.var="pclass", n.pt=50, ylim=c(0, 1))
grid()
# TODO following fails
pd <- generatePartialDependenceData(regr.rf.with.call, task.regr.rf, "pclass", n=c(50, NA))
try(print(plotPartialDependence(pd, data = getTaskData(task.regr.rf)))) # Error: Discrete value supplied to continuous scale
cat("==classification model with randomForest (binary response)======================\n")
set.seed(2018)
library(earth) # for etitanic data
data(etitanic)
etit <- etitanic
etit$survived <- factor(etit$survived, labels=c("notsurvived", "survived"))
task.classif.rf <- makeClassifTask(data=etit, target="survived")
lrn.classif.rf <- makeLearner("classif.randomForest", predict.type="prob")
set.seed(2018)
classif.rf.with.call <- train.with.call(lrn.classif.rf, task.classif.rf, , subset=train.subset)
set.seed(2018)
rf <- randomForest(survived~., data=etit, method="class", subset=train.subset)
# sanity check that the models are identical
stopifnot(identical(predict(classif.rf.with.call$learner.model), predict(rf)))
# TODO following causes Error: classif.earth: Setting parameter glm without available description object
# lrn <- makeLearner("classif.earth", degree=2, glm=list(family=binomial))
# TODO residuals on WrappedModel don't match direct call to rf model
set.seed(2018) # for repeatable jitter
plotres(classif.rf.with.call, nresponse="prob.survived", SHOWCALL=TRUE, jitter=2)
set.seed(2018)
plotres(classif.rf.with.call$learner.model, type="prob", SHOWCALL=TRUE, jitter=2)
set.seed(2018)
plotres(rf, type="prob", SHOWCALL=TRUE, jitter=2)
options(warn=2) # treat warnings as errors
expect.err(try(plotmo(classif.rf.with.call)), "Defaulting to nresponse=1, see above messages")
options(warn=1)
set.seed(2018) # for repeatable jitter
plotmo(classif.rf.with.call, SHOWCALL=TRUE, nresponse="prob.survived", pt.col=2, trace=2)
set.seed(2018)
plotmo(classif.rf.with.call$learner.model, SHOWCALL=TRUE, type="prob", pt.col=2)
set.seed(2018)
# note that in the following, get.y.shift.scale (in plotmo code) rescales the plotted y to 0..1
plotmo(rf, SHOWCALL=TRUE, type="prob", pt.col="gray")
set.seed(2018)
# in following graph, note that get.y.shift.scale doesn't rescale the plotted y because ylim=c(0,2)
plotmo(rf, SHOWCALL=TRUE, type="prob", ylim=c(0,2), pt.col="gray")
# compare partial dependence plots
set.seed(2018)
plotmo(rf, type="prob", degree1="pclass", degree2=0, pmethod="partdep", pt.col=2, SHOWCALL=TRUE)
set.seed(2018)
plotmo(rf, degree1="pclass", degree2=0, pmethod="partdep", pt.col=2, SHOWCALL=TRUE)
set.seed(2018)
# TODO following fails
pd <- generatePartialDependenceData(classif.rf.with.call, task.classif.rf, "pclass", n=c(50, NA))
try(print(plotPartialDependence(pd, data = getTaskData(task.classif.rf)))) # Error: Discrete value supplied to continuous scale
plotmo(rf, type="prob", nresponse="notsurvived", degree1="age", degree2=0,
pmethod="partdep", ylim=c(.3,.75), nrug=TRUE, grid.col="gray") # looks plausible
set.seed(2018)
pd <- generatePartialDependenceData(classif.rf.with.call, task.classif.rf, "age", n=c(50, NA))
print(plotPartialDependence(pd, data = getTaskData(task.classif.rf)))
cat("==examples from plotmo-notes.pdf ===============================================\n")
#-- Regression model with mlr -------------------------------------------
library(mlr)
library(plotmo)
lrn <- makeLearner("regr.svm")
fit1.with.call <- train.with.call(lrn, bh.task)
fit1 <- train(lrn, bh.task)
# generate partial dependence plots for all variables
# we use "apartdep" and not "partdep" to save testing time
plotmo(fit1.with.call, pmethod="apartdep")
plotmo(fit1$learner.model, pmethod="apartdep")
# generate partial dependence plot for just "lstat"
set.seed(2018) # so slight jitter on pt.col points in plotmo doesn't change across test runs
plotmo(fit1.with.call,
degree1="lstat", # what predictor to plot
degree2=0, # no interaction plots
pmethod="partdep", # generate partial dependence plot
pt.col=2, grid.col="gray", # optional bells and whistles
nrug=TRUE) # rug ticks along the bottom
set.seed(2018) # so slight jitter on pt.col points in plotmo doesn't change across test runs
plotmo(fit1$learner.model,
degree1="lstat", # what predictor to plot
degree2=0, # no interaction plots
pmethod="partdep", # generate partial dependence plot
pt.col=2, grid.col="gray", # optional bells and whistles
nrug=TRUE) # rug ticks along the bottom
# compare to the function provided by the mlr package
set.seed(2018)
pd <- generatePartialDependenceData(fit1, bh.task, "lstat", n=c(50, NA))
print(plotPartialDependence(pd, data = getTaskData(bh.task)))
# # TODO following fails: Error: Discrete value supplied to continuous scale
# pd <- generatePartialDependenceData(fit1, bh.task, "chas", n=c(50, NA))
# plotPartialDependence(pd, data = getTaskData(bh.task))
#-- Classification model with mlr ---------------------------------------
lrn.classif.rpart <- makeLearner("classif.rpart", predict.type = "prob", minsplit = 10)
fit2.with.call <- train.with.call(lrn.classif.rpart, iris.task)
fit2 <- train(lrn.classif.rpart, iris.task)
# generate partial dependence plots for all variables
# TODO plotmo can plot the response for only one class at a time
plotmo(fit2.with.call,
nresponse="prob.virginica", # what response to plot
# type="prob", # type gets passed to predict.rpart
pmethod="apartdep") # generate partial dependence plot
plotmo(fit2$learner.model,
nresponse="virginica", # what response to plot
type="prob", # type gets passed to predict.rpart
pmethod="apartdep") # generate partial dependence plot
# generate partial dependence plot for just "Petal.Length"
plotmo(fit2.with.call,
degree1="Petal.Length", # what predictor to plot
degree2=0, # no interaction plots
nresponse="prob.virginica", # what response to plot
# type="prob", # type gets passed to predict.rpart
pmethod="apartdep") # generate partial dependence plot
plotmo(fit2$learner.model,
degree1="Petal.Length", # what predictor to plot
degree2=0, # no interaction plots
nresponse="virginica", # what response to plot
type="prob", # type gets passed to predict.rpart
pmethod="apartdep") # generate partial dependence plot
# compare to the function provided by the mlr package
set.seed(2018)
pd <- generatePartialDependenceData(fit2, iris.task, "Petal.Length", n=c(50, NA))
print(plotPartialDependence(pd, data = getTaskData(iris.task)))
cat("==lda example from mlr documentation, and plotmo error handling =================\n")
set.seed(2018)
data(iris)
task.lda <- makeClassifTask(data=iris, target="Species")
lrn.lda <- makeLearner("classif.lda")
n <- nrow(iris)
train.set <- sample(n, size=2/3*n)
test.set <- setdiff(1:n, train.set)
classif.lda.with.call <- train.with.call(lrn.lda, task.lda, subset=train.set)
classif.lda <- train(lrn.lda, task.lda, subset=train.set)
iris1 <- iris[train.set, ]
library(MASS)
lda <- lda(Species~., data=iris1)
# expect.err(try(plotres(classif.lda.with.call)), "plotres does not (yet) support type=\"class\" for \"lda\" objects")
expect.err(try(plotres(classif.lda$learner.model)), "plotres does not (yet) support type=\"class\" for \"lda\" objects")
options(warn=2) # treat warnings as errors
# expect.err(try(plotres(classif.lda.with.call, type="response")), "predict.lda returned multiple columns (see above) but nresponse is not specified")
expect.err(try(plotres(classif.lda$learner.model, type="response")), "Defaulting to nresponse=1, see above messages")
options(warn=1)
expect.err(try(plotres(classif.lda.with.call, type="response", nresponse="nonesuch")), "nresponse=\"nonesuch\" is not allowed")
expect.err(try(plotres(classif.lda$learner.model, type="response", nresponse="nonesuch")), "nresponse=\"nonesuch\" is not allowed")
expect.err(try(plotres(classif.lda.with.call, type="response", nresponse=0)), "nresponse=0 but it should be at least 1")
expect.err(try(plotres(classif.lda$learner.model, type="response", nresponse=0)), "nresponse=0 but it should be at least 1")
expect.err(try(plotres(classif.lda.with.call, type="response", nresponse=99)), "nresponse is 99 but the number of columns is only 1")
expect.err(try(plotres(classif.lda$learner.model, type="response", nresponse=99)), "nresponse is 99 but the number of columns is only 2")
expect.err(try(plotmo(classif.lda)), "getCall(classif.lda) failed")
expect.err(try(plotres(classif.lda)), "getCall(classif.lda) failed")
# TODO residuals don't match
plotres(classif.lda.with.call, SHOWCALL=TRUE, type="response")
plotres(classif.lda$learner.model, SHOWCALL=TRUE, type="response", nresponse="LD2")
plotres(lda, SHOWCALL=TRUE, type="response", nresponse="LD2")
plotmo(classif.lda.with.call, SHOWCALL=TRUE)
plotmo(classif.lda$learner.model, SHOWCALL=TRUE)
plotmo(lda, SHOWCALL=TRUE)
# # TODO plotPartialDependence and plotmo graphs below don't match
# pd <- generatePartialDependenceData(classif.lda, task.lda, "Petal.Width", n=c(50, NA)) # TODO generates warnings
# print(plotPartialDependence(pd, data = getTaskData(task.lda)))
plotmo(classif.lda.with.call, degree1="Petal.Width", degree2=0, pmethod="partdep", do.par=FALSE)
plotmo(classif.lda.with.call, SHOWCALL=TRUE, all2=TRUE, type="response")
plotmo(classif.lda$learner.model, SHOWCALL=TRUE, all2=TRUE, type="class")
plotmo(lda, SHOWCALL=TRUE, all2=TRUE, type="class")
plotmo(classif.lda$learner.model, SHOWCALL=TRUE, all2=TRUE, type="response", nresponse="LD1")
plotmo(lda, SHOWCALL=TRUE, all2=TRUE, type="response", nresponse="LD1")
cat("==test recursive call to plotmo_prolog for learner.model===============\n")
set.seed(2018)
n <- 100
data <- data.frame(
x1 = rnorm(n),
x2 = rnorm(n),
x3 = rnorm(n),
x4 = rnorm(n),
x5 = rnorm(n),
x6 = rnorm(n),
x7 = rnorm(n),
x8 = rnorm(n),
x9 = rnorm(n))
data$y <- sin(data$x3) + sin(data$x4) + 2 * cos(data$x5)
set.seed(2018)
library(gbm)
# reference model
gbm = gbm(y~., data=data, n.trees=300)
plotmo(gbm, trace=-1, SHOWCALL=TRUE)
set.seed(2018)
task <- makeRegrTask(data=data, target="y")
lrn <- makeLearner("regr.gbm", n.trees=300, keep.data=TRUE)
regr.gbm = train.with.call(lrn, task)
plotmo(regr.gbm, trace=-1, SHOWCALL=TRUE)
set.seed(2018)
lrn <- makeLearner("regr.gbm", n.trees=300)
regr.gbm.nokeepdata = train.with.call(lrn, task)
# expect message: use keep.data=TRUE in the call to gbm (cannot determine the variable importances)
plotmo(regr.gbm.nokeepdata, trace=1, SHOWCALL=TRUE)
plotres(regr.gbm, SHOWCALL=TRUE)
cat("==example from makeClassificationViaRegressionWrapper help page ===============\n")
# this tests that plotmo.prolog can access the learner.model at object$learner.model$next.model$learner.model
set.seed(2018)
lrn = makeLearner("regr.rpart")
lrn = makeClassificationViaRegressionWrapper(lrn)
ClassificationViaRegression = train.with.call(lrn, sonar.task, subset = 1:140)
plotmo(ClassificationViaRegression, SHOWCALL=TRUE)
source("test.epilog.R")
|