File: test_base_clustering.R

package info (click to toggle)
r-cran-mlr 2.19.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 8,392 kB
  • sloc: ansic: 65; sh: 13; makefile: 5
file content (119 lines) | stat: -rwxr-xr-x 3,435 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

test_that("clustering predict", {

  # RWeka causes problems
  skip_on_cran()

  lrn = makeLearner("cluster.cmeans", predict.type = "prob")
  model = train(lrn, noclass.task)
  pred = predict(model, task = noclass.task)
  y = pred$data$response
  expect_true(is.integer(y))

  p = getPredictionProbabilities(pred)
  expect_true(is.data.frame(p) && nrow(noclass.df) && ncol(p) == max(y))
})


test_that("clustering performance", {
  requirePackagesOrSkip("clusterSim", default.method = "load")

  # RWeka causes problems
  skip_on_cran()
  # RWeka not avail
  skip_on_os("windows")

  lrn = makeLearner("cluster.SimpleKMeans")
  model = train(lrn, noclass.task)
  pred = predict(model, task = noclass.task)

  expect_true(is.numeric(performance(pred, task = noclass.task,
    measures = db)))
  expect_true(is.numeric(performance(pred, task = noclass.task,
    measures = G1)))
  expect_true(is.numeric(performance(pred, task = noclass.task,
    measures = G2)))
  expect_true(is.numeric(performance(pred, task = noclass.task,
    measures = silhouette)))
})

test_that("clustering performance with missing clusters", {
  requirePackagesOrSkip("clusterSim", default.method = "load")

  # RWeka causes problems
  skip_on_cran()
  # RWeka not avail
  skip_on_os("windows")

  lrn = makeLearner("cluster.SimpleKMeans")
  model = train(lrn, noclass.task)
  pred = predict(model, task = noclass.task)
  pred$data$response = sample(c(1, 3, 4), length(pred$data$response),
    replace = TRUE)

  expect_warning(performance(pred, task = noclass.task, measures = db), NA)
  expect_warning(performance(pred, task = noclass.task, measures = G1), NA)
  expect_warning(performance(pred, task = noclass.task, measures = G2), NA)
  expect_warning(performance(pred, task = noclass.task, measures = silhouette),
    NA)
})

test_that("clustering resample", {
  requirePackagesOrSkip("clusterSim", default.method = "load")

  # RWeka causes problems
  skip_on_cran()
  # RWeka not avail
  skip_on_os("windows")

  rdesc = makeResampleDesc("Subsample", split = 0.3, iters = 2)
  lrn = makeLearner("cluster.SimpleKMeans")
  res = resample(lrn, noclass.task, rdesc)

  expect_true(all(!is.na(res$measures.test)))
  expect_false(is.na(res$aggr))
})

test_that("clustering benchmark", {
  requirePackagesOrSkip("clusterSim", default.method = "load")

  # RWeka causes problems
  skip_on_cran()
  # RWeka not avail
  skip_on_os("windows")

  task.names = "noclass"
  tasks = list(noclass.task)
  learner.names = "cluster.SimpleKMeans"
  learners = lapply(learner.names, makeLearner)
  rin = makeResampleDesc("CV", iters = 2L)

  res = benchmark(learners = learners, task = tasks,
    resamplings = makeResampleDesc("CV", iters = 2L))
  expect_true("BenchmarkResult" %in% class(res))
})

test_that("clustering downsample", {
  down.tsk = downsample(noclass.task, perc = 1 / 3)
  expect_equal(getTaskSize(down.tsk), 50L)
})

test_that("clustering tune", {
  requirePackagesOrSkip("clusterSim", default.method = "load")

  # RWeka causes problems
  skip_on_cran()
  # RWeka not avail
  skip_on_os("windows")

  lrn = makeLearner("cluster.SimpleKMeans")
  rdesc = makeResampleDesc("Holdout")
  ps = makeParamSet(
    makeIntegerParam("N", lower = 2, upper = 10)
  )

  ctrl = makeTuneControlRandom(maxit = 2)
  tr = tuneParams(lrn, noclass.task, rdesc, par.set = ps, control = ctrl)
  expect_equal(getOptPathLength(tr$opt.path), 2)
  expect_true(!is.na(tr$y))
})