1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
|
test_that("performance", {
res = makeResampleDesc("Holdout")
lrn = makeLearner("classif.rpart")
rf = resample(lrn, task = binaryclass.task, resampling = res,
measures = list(acc))
expect_true(all(rf$aggr > 0))
res = makeResampleDesc("Bootstrap", iters = 3L)
rf = resample(lrn, task = binaryclass.task, resampling = res,
measures = list(acc))
expect_true(all(rf$aggr > 0))
m = setAggregation(acc, test.median)
rf = resample(lrn, task = binaryclass.task, resampling = res, measures = m)
expect_true(all(rf$aggr > 0))
# custom measure
res = makeResampleDesc("CV", iters = 3)
mymeasure = makeMeasure(id = "mym", minimize = TRUE,
properties = c("classif", "classif.multi", "predtype.response"),
fun = function(task, model, pred, feats, extra.args) {
# normal test error
e1 = mean(pred$data$truth != pred$data$response)
# we do this manually
id = pred$data$id
t2 = getTaskTargets(task)[id]
e2 = mean(t2 != pred$data$response)
expect_equal(e1, e2)
0
}
)
r = resample(lrn, task = binaryclass.task, resampling = res,
measures = mymeasure)
expect_true(r$aggr >= 0)
perf = performance(r$pred, measures = mymeasure, task = binaryclass.task)
expect_true(perf >= 0)
# multiple measures as list
res = performance(r$pred, measures = list(ber, acc, tp),
task = binaryclass.task)
expect_true(!any(is.na(res)))
expect_true(length(res) == 3)
expect_equal(names(res), c("ber", "acc", "tp"))
# custom measure
mymeasure = makeCustomResampledMeasure(measure.id = "mym",
aggregation.id = "train.mean",
properties = c("classif", "predtype.response"),
fun = function(task, group, pred, feats, extra.args) {
mean(pred$data$truth != pred$data$response)
}
)
rdesc = makeResampleDesc("Holdout")
r = resample(lrn, binaryclass.task, rdesc, measures = list(mmce, mymeasure))
expect_equal(as.numeric(r$aggr["mmce.test.mean"]), as.numeric(r$aggr["mym.train.mean"]))
})
test_that("performance is NA if 'on.measure.not.applicable' is not 'stop'", {
default = getMlrOption("on.measure.not.applicable")
vals = c("quiet", "warn", "stop")
for (i in vals) {
configureMlr(on.measure.not.applicable = i)
lrn = makeLearner("classif.qda", predict.type = "response")
mod = train(lrn, binaryclass.task)
pred = predict(mod, binaryclass.task)
if (i == "quiet") {
expect_equal(unname(performance(pred, auc)), NA_real_)
# does this also work with benchmark?
b = benchmark(lrn, binaryclass.task, measures = list(acc, auc),
resamplings = cv5)
expect_true(any(is.na(as.data.frame(b)$auc)))
expect_false(any(is.na(as.data.frame(b)$acc)))
} else if (i == "warn") {
expect_warning(expect_equal(unname(performance(pred, auc)), NA_real_))
# does this also work with benchmark?
suppressWarnings(
expect_warning({
b = benchmark(lrn, binaryclass.task, measures = list(acc, auc),
resamplings = cv5)
})
)
expect_true(any(is.na(as.data.frame(b)$auc)))
expect_false(any(is.na(as.data.frame(b)$acc)))
} else {
expect_error(performance(pred, auc))
# does this also work with benchmark?
expect_error(benchmark(lrn.list, binaryclass.task,
measures = list(acc, auc),
resamplings = cv3))
}
}
configureMlr(on.measure.not.applicable = default)
})
test_that("performance checks for missing truth col", {
lrn = makeLearner("classif.rpart", predict.type = "prob")
m = train(lrn, binaryclass.task)
test.x = getTaskData(binaryclass.task, target.extra = TRUE)$data
pred = predict(m, newdata = test.x)
expect_error(performance(pred, measures = mmce), "need to have a 'truth' col")
})
test_that("performance checks for req prob type", {
lrn = makeLearner("classif.rpart")
expect_error(holdout(lrn, binaryclass.task, measures = auc),
"predict type to be: 'prob'")
})
test_that("performance works with ResamplePrediction", {
lrn = makeLearner("classif.lda", predict.type = "prob")
res = makeResampleDesc("Bootstrap", iters = 5L, predict = "both")
rf = resample(lrn, task = binaryclass.task, resampling = res, mmce)
expect_true(rf$aggr > 0)
expect_true(rf$aggr < 1)
perf = performance(rf$pred)
expect_true(perf > 0)
expect_true(perf < 1)
# FIXME: names for measures are different for aggregated measures, which we
# currently don't do because it breaks other stuff
rf$aggr = setNames(rf$aggr, names(perf))
expect_equal(rf$aggr, perf)
})
|