File: RLearner_classif_logreg.R

package info (click to toggle)
r-cran-mlr 2.19.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 8,264 kB
  • sloc: ansic: 65; sh: 13; makefile: 5
file content (45 lines) | stat: -rw-r--r-- 1,501 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#' @export
makeRLearner.classif.logreg = function() {
  makeRLearnerClassif(
    cl = "classif.logreg",
    package = "stats",
    par.set = makeParamSet(
      makeLogicalLearnerParam("model", default = TRUE, tunable = FALSE)
    ),
    par.vals = list(
      model = FALSE
    ),
    properties = c("twoclass", "numerics", "factors", "prob", "weights"),
    name = "Logistic Regression",
    short.name = "logreg",
    note = "Delegates to `glm` with `family = binomial(link = 'logit')`. We set 'model' to FALSE by default to save memory.",
    callees = "glm"
  )
}

#' @export
trainLearner.classif.logreg = function(.learner, .task, .subset, .weights = NULL, ...) {
  f = getTaskFormula(.task)

  # logreg expects the first label to be the negative class, contrary
  # to the mlr3 convention that the positive class comes first.
  # see https://github.com/mlr-org/mlr/issues/2817
  tn = getTaskTargetNames(.task)
  data = getTaskData(.task, .subset)
  data[[tn]] = swap_levels(data[[tn]])

  stats::glm(f, data = getTaskData(.task, .subset), family = "binomial", weights = .weights, ...)
}

#' @export
predictLearner.classif.logreg = function(.learner, .model, .newdata, ...) {
  x = predict(.model$learner.model, newdata = .newdata, type = "response", ...)
  levs = .model$task.desc$class.levels
  if (.learner$predict.type == "prob") {
    propVectorToMatrix(x, levs)
  } else {
    levs = .model$task.desc$class.levels
    p = as.factor(ifelse(x > 0.5, levs[2L], levs[1L]))
    unname(p)
  }
}