File: convertMLBenchObjToTask.R

package info (click to toggle)
r-cran-mlr 2.19.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 8,264 kB
  • sloc: ansic: 65; sh: 13; makefile: 5
file content (74 lines) | stat: -rw-r--r-- 2,066 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#' @title Convert a machine learning benchmark / demo object from package mlbench to a task.
#'
#' @description
#' We auto-set the target column, drop any column which is called \dQuote{Id} and
#' convert logicals to factors.
#'
#' @param x (`character(1)`)\cr
#'   Name of an mlbench function or dataset.
#' @param n (`integer(1)`)\cr
#'   Number of observations for data simul functions.
#'   Note that for a few mlbench function this setting is not exactly respected by mlbench.
#'   Default is 100.
#' @param ... (any)\cr
#'   Passed on to data simul functions.
#' @export
#' @examples
#' \dontshow{ if (requireNamespace("mlbench")) \{ }
#' print(convertMLBenchObjToTask("Ionosphere"))
#' print(convertMLBenchObjToTask("mlbench.spirals", n = 100, sd = 0.1))
#' \dontshow{ \} }
convertMLBenchObjToTask = function(x, n = 100L, ...) {

  assertString(x)
  requirePackages("mlbench")
  id = x

  datasets = data(package = "mlbench")
  datasets = datasets$results[, "Item"]

  targets = c(
    Soybean = "Class",
    BostonHousing = "medv",
    BostonHousing2 = "medv",
    BreastCancer = "Class",
    DNA = "Class",
    Glass = "Type",
    HouseVotes84 = "Class",
    Ionosphere = "Class",
    LetterRecognition = "lettr",
    Ozone = "V4",
    PimaIndiansDiabetes = "diabetes",
    PimaIndiansDiabetes2 = "diabetes",
    Satellite = "classes",
    Servo = "Class",
    Shuttle = "Class",
    Sonar = "Class",
    Soybean = "Class",
    Vehicle = "Class",
    Vowel = "Class",
    Zoo = "type"
  )

  if (x %in% datasets) {
    # we load a data set
    ee = new.env()
    data(list = x, envir = ee)
    d = ee[[x]]
    d$Id = NULL
    target = targets[[x]]
    d = convertDfCols(d, logicals.as.factor = TRUE)
  } else {
    x = getFromNamespace(x, "mlbench")
    n = asCount(n)
    z = x(n = n, ...)
    d = as.data.frame(z)
    target = if (!is.null(z$classes)) "classes" else "y"
  }
  task = if (is.factor(d[, target])) {
    makeClassifTask(id = id, data = d, target = target)
  } else {
    makeRegrTask(id = id, data = d, target = target)
  }
  return(task)
}