File: test_base_downsample.R

package info (click to toggle)
r-cran-mlr 2.19.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 8,392 kB
  • sloc: ansic: 65; sh: 13; makefile: 5
file content (79 lines) | stat: -rwxr-xr-x 2,794 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

test_that("downsample", {
  down.tsk = downsample(multiclass.task, perc = 1 / 3)
  expect_equal(getTaskSize(down.tsk), 50L)
  rsm.methods = c("Bootstrap", "Subsample", "Holdout")
  for (rsm.method in rsm.methods) {
    rin = makeResampleInstance(rsm.method, task = binaryclass.task)
    rin2 = downsample(rin, perc = 0.5)
    sapply(seq_along(rin$train.inds), function(i) {
      expect_equal(
        length(rin2$train.inds[[i]]),
        length(rin$train.inds[[i]]) / 2
      )
    })
  }
})

test_that("downsample wrapper", {
  # test it with classif
  rdesc = makeResampleDesc("CV", iters = 2)
  lrn = makeDownsampleWrapper("classif.rpart", dw.perc = 0.5)
  r = resample(lrn, binaryclass.task, rdesc)
  expect_true(!is.na(r$aggr))

  # test it with regr
  rdesc = makeResampleDesc("CV", iters = 2)
  lrn = makeDownsampleWrapper("regr.rpart", dw.perc = 0.5)
  r = resample(lrn, regr.task, rdesc)
  expect_true(!is.na(r$aggr))
})

test_that("downsample wrapper works with xgboost, we had issue #492", {
  skip_if_not_installed("xgboost") # xgboost broken on CRAN, they cannot run our tests
  rdesc = makeResampleDesc("CV", iters = 2)
  lrn = makeDownsampleWrapper("classif.xgboost", dw.perc = 0.5)
  expect_output(print(lrn), "down")
  r = resample(lrn, binaryclass.task, rdesc)
  expect_true(!is.na(r$aggr))
})

test_that("downsample wrapper works with weights, we had issue #838", {
  n = nrow(regr.df)
  w = 1:n
  task = makeRegrTask(data = regr.df, target = regr.target, weights = w)

  # weights from task, use all
  lrn = makeDownsampleWrapper("regr.__mlrmocklearners__6", dw.perc = 1)
  m = train(lrn, task)
  expect_set_equal(getLearnerModel(m, more.unwrap = TRUE)$weights, w)

  # weights from task, really downsample
  lrn = makeDownsampleWrapper("regr.__mlrmocklearners__6", dw.perc = 0.5)
  m = train(lrn, task)
  u = getLearnerModel(m, more.unwrap = TRUE)$weights
  expect_equal(length(u), n / 2)
  expect_subset(u, w)

  # weights from train
  lrn = makeDownsampleWrapper("regr.__mlrmocklearners__6", dw.perc = 0.5)
  m = train(lrn, task, subset = 11:20, weights = 1:10)
  u = getLearnerModel(m, more.unwrap = TRUE)$weights
  expect_equal(length(u), 5)
  expect_subset(u, 1:10)
})

test_that("training performance works as expected (#1357)", {
  num = makeMeasure(id = "num", minimize = FALSE,
    properties = c("classif", "classif.multi", "req.pred", "req.truth"),
    name = "Number",
    fun = function(task, model, pred, feats, extra.args) {
      length(pred$data$response)
    }
  )

  rdesc = makeResampleDesc("Holdout", predict = "both")
  lrn = makeDownsampleWrapper("classif.rpart", dw.perc = 0.1)
  r = resample(lrn, multiclass.task, rdesc, measures = list(setAggregation(num, train.mean)))
  expect_lte(r$measures.train$num, getTaskSize(multiclass.task) * 0.1)
})