1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
|
test_that("OverBagging wrapper", {
rdesc = makeResampleDesc("CV", iters = 2)
lrn1 = makeLearner("classif.rpart")
lrn2 = makeOverBaggingWrapper(lrn1, obw.rate = 2)
r = resample(lrn2, binaryclass.task, rdesc)
expect_true(!is.na(r$aggr))
})
test_that("OverBagging wrapper arg check works", {
task = makeClassifTask(data = binaryclass.df, target = binaryclass.target)
lrn1 = makeLearner("classif.rpart")
expect_error(makeOverBaggingWrapper(lrn1, obw.rate = 0.5))
})
test_that("oversampling in each bag works", {
y = binaryclass.df[, binaryclass.target]
tab1 = table(y)
task = makeClassifTask(data = binaryclass.df, target = binaryclass.target)
lrn1 = makeLearner("classif.rpart")
lrn2 = makeOverBaggingWrapper(lrn1, obw.rate = 5, obw.iters = 3)
mod = train(lrn2, task)
models = getLearnerModel(mod)
# check min class size gets increased by rate/factor 5
tab = lapply(seq_along(models), function(i) {
data = getTaskData(task, models[[i]]$subset)
tab = table(data[, binaryclass.target])
expect_equal(tab1["M"], tab["M"])
expect_equal(tab1["R"], round(tab["R"] / 5))
})
})
test_that("oversampling bigger class works", {
y = binaryclass.df[, binaryclass.target]
z = getMinMaxClass(y)
tab1 = table(y)
task = makeClassifTask(data = binaryclass.df, target = binaryclass.target)
lrn1 = makeLearner("classif.rpart")
lrn2 = makeOverBaggingWrapper(lrn1, obw.rate = 5, obw.iters = 3, obw.cl = z$max.name)
mod = train(lrn2, task)
models = getLearnerModel(mod)
tab = lapply(seq_along(models), function(i) {
data = getTaskData(task, models[[1]]$subset)
tab = table(data[, binaryclass.target])
expect_equal(tab1["R"], tab["R"])
expect_equal(tab1["M"], round(tab["M"] / 5))
})
})
|