File: test_base_getTaskData.R

package info (click to toggle)
r-cran-mlr 2.19.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 8,392 kB
  • sloc: ansic: 65; sh: 13; makefile: 5
file content (90 lines) | stat: -rwxr-xr-x 3,797 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90

test_that("getTaskData", {
  df = getTaskData(multiclass.task)
  expect_equal(df, multiclass.df)
  df = getTaskData(multiclass.task, subset = 1:10, features = colnames(multiclass.df)[1:2])
  expect_equal(df, multiclass.df[1:10, c(1:2, 5)])

  # recode.target
  td = getTaskDesc(binaryclass.task)
  df = getTaskData(binaryclass.task, recode.target = "01")
  expect_equal(df[, 1:20], binaryclass.df[, 1:20])
  expect_true(is.numeric(df[, binaryclass.target]))
  expect_equal(sum(df[, binaryclass.target] == 1),
    sum(binaryclass.df[, binaryclass.target] == td$positive))
  expect_equal(sum(df[, binaryclass.target] == 0),
    sum(binaryclass.df[, binaryclass.target] == td$negative))

  df = getTaskData(binaryclass.task, recode.target = "-1+1")
  expect_equal(df[, 1:20], binaryclass.df[, 1:20])
  expect_true(is.numeric(df[, binaryclass.target]))
  expect_equal(sum(df[, binaryclass.target] == 1),
    sum(binaryclass.df[, binaryclass.target] == td$positive))
  expect_equal(sum(df[, binaryclass.target] == -1),
    sum(binaryclass.df[, binaryclass.target] == td$negative))

  df = getTaskData(multilabel.task, recode.target = "multilabel.factor")
  expect_true(all(sapply(df[, multilabel.target], is.factor)))
  expect_true(all(df[multilabel.small.inds, multilabel.target] == data.frame(
    y1 = as.factor(c(TRUE, FALSE, TRUE, TRUE)),
    y2 = as.factor(c(FALSE, TRUE, FALSE, FALSE)))))
  expect_equal(rownames(df[multilabel.small.inds, multilabel.target]), c("1", "52", "53", "123"))

  df = getTaskData(binaryclass.task, subset = 1:150, features = colnames(binaryclass.df)[1:2])
  expect_equal(nrow(df), 150)
  expect_equal(ncol(df), 3)
  df = getTaskData(binaryclass.task, subset = 1:150, features = colnames(binaryclass.df)[1:2],
    recode.target = "01")
  expect_equal(nrow(df), 150)
  expect_equal(ncol(df), 3)

  x = getTaskData(multiclass.task, target.extra = TRUE)
  expect_equal(x$data[, 1:4], multiclass.df[, 1:4])
  expect_equal(x$target, multiclass.df[, multiclass.target])

  # getTaskData works with index vector
  df = getTaskData(binaryclass.task, subset = 1:150, features = 1:2)
  expect_equal(nrow(df), 150)
  expect_equal(ncol(df), 3)
})

test_that("getTaskData survival", {
  df = getTaskData(surv.task)
  expect_equal(df, surv.df)
  cn = colnames(surv.df)[3:4]
  df = getTaskData(surv.task, subset = 1:10, features = cn)
  expect_equal(df, surv.df[1:10, union(cn, surv.target)])

  x = getTaskData(surv.task, target.extra = TRUE)
  expect_true(setequal(names(x), c("data", "target")))
  expect_true(is.data.frame(x$data))
  expect_true(is.data.frame(x$target))
  expect_equal(dim(x$data), c(nrow(surv.df), ncol(surv.df) - 2))
  expect_equal(dim(x$target), c(nrow(surv.df), 2L))
  expect_equal(names(x$target), surv.target)
  expect_true(setequal(names(x$data), setdiff(names(surv.df), surv.target)))

  x = getTaskData(surv.task, target.extra = TRUE, recode.target = "surv")
  expect_true(survival::is.Surv(x$target))
  expect_equal(dim(x$target), c(nrow(surv.df), 2L))
})

test_that("getTaskData multilabel", {
  df = getTaskData(multilabel.task)
  expect_equal(df, multilabel.df)
  cn = colnames(multilabel.df)[3:4]
  df = getTaskData(multilabel.task, subset = 1:10, features = cn)
  expect_equal(df, multilabel.df[1:10, union(cn, multilabel.target)])

  x = getTaskData(multilabel.task, target.extra = TRUE)
  expect_true(setequal(names(x), c("data", "target")))
  expect_true(is.data.frame(x$data))
  expect_true(is.data.frame(x$target))
  expect_equal(dim(x$data), c(150L, 5L))
  expect_equal(dim(x$target), c(150L, 2L))
  expect_equal(names(x$target), multilabel.target)
  expect_true(setequal(names(x$data), setdiff(names(multilabel.df), multilabel.target)))

  x = getTaskData(multilabel.task, target.extra = TRUE)
  expect_equal(dim(x$target), c(150L, 2L))
})