File: tuneParams.R

package info (click to toggle)
r-cran-mlr 2.19.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 8,264 kB
  • sloc: ansic: 65; sh: 13; makefile: 5
file content (166 lines) | stat: -rw-r--r-- 7,303 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# FIXME: check whether optimization can be paralleized if req. by user

#' @title Hyperparameter tuning.
#'
#' @description
#' Optimizes the hyperparameters of a learner.
#' Allows for different optimization methods, such as grid search, evolutionary strategies,
#' iterated F-race, etc. You can select such an algorithm (and its settings)
#' by passing a corresponding control object. For a complete list of implemented algorithms look at
#' [TuneControl].
#'
#' Multi-criteria tuning can be done with [tuneParamsMultiCrit].
#'
#' @template arg_learner
#' @template arg_task
#' @param resampling ([ResampleInstance] | [ResampleDesc])\cr
#'   Resampling strategy to evaluate points in hyperparameter space. If you pass a description,
#'   it is instantiated once at the beginning by default, so all points are
#'   evaluated on the same training/test sets.
#'   If you want to change that behavior, look at [TuneControl].
#' @template arg_measures_opt
#' @param par.set ([ParamHelpers::ParamSet])\cr
#'   Collection of parameters and their constraints for optimization.
#'   Dependent parameters with a `requires` field must use `quote` and not
#'   `expression` to define it.
#' @param control ([TuneControl])\cr
#'   Control object for search method. Also selects the optimization algorithm for tuning.
#' @template arg_showinfo
#' @param resample.fun ([closure])\cr
#'   The function to use for resampling. Defaults to [resample]. If a user-given function
#'   is to be used instead, it should take the arguments \dQuote{learner}, \dQuote{task}, \dQuote{resampling},
#'   \dQuote{measures}, and \dQuote{show.info}; see [resample]. Within this function,
#'   it is easiest to call [resample] and possibly modify the result.
#'   However, it is possible to return a list with only the following essential slots:
#'   the \dQuote{aggr} slot for general tuning, additionally the \dQuote{pred} slot if threshold tuning is performed
#'   (see [TuneControl]), and the \dQuote{err.msgs} and \dQuote{err.dumps} slots for error reporting.
#'   This parameter must be the default when `mbo` tuning is performed.
#' @return ([TuneResult]).
#' @family tune
#' @note If you would like to include results from the training data set, make
#' sure to appropriately adjust the resampling strategy and the aggregation for
#' the measure. See example code below.
#' @export
#' @examples
#' \dontshow{ if (requireNamespace("kernlab")) \{ }
#' \dontshow{ if (requireNamespace("kernlab")) \{ }
#' \dontshow{ if (requireNamespace("irace")) \{ }
#' set.seed(123)
#' # a grid search for an SVM (with a tiny number of points...)
#' # note how easily we can optimize on a log-scale
#' ps = makeParamSet(
#'   makeNumericParam("C", lower = -12, upper = 12, trafo = function(x) 2^x),
#'   makeNumericParam("sigma", lower = -12, upper = 12, trafo = function(x) 2^x)
#' )
#' ctrl = makeTuneControlGrid(resolution = 2L)
#' rdesc = makeResampleDesc("CV", iters = 2L)
#' res = tuneParams("classif.ksvm", iris.task, rdesc, par.set = ps, control = ctrl)
#' print(res)
#' # access data for all evaluated points
#' df = as.data.frame(res$opt.path)
#' df1 = as.data.frame(res$opt.path, trafo = TRUE)
#' print(head(df[, -ncol(df)]))
#' print(head(df1[, -ncol(df)]))
#' # access data for all evaluated points - alternative
#' df2 = generateHyperParsEffectData(res)
#' df3 = generateHyperParsEffectData(res, trafo = TRUE)
#' print(head(df2$data[, -ncol(df2$data)]))
#' print(head(df3$data[, -ncol(df3$data)]))
#' \dontrun{
#' # we optimize the SVM over 3 kernels simultanously
#' # note how we use dependent params (requires = ...) and iterated F-racing here
#' ps = makeParamSet(
#'   makeNumericParam("C", lower = -12, upper = 12, trafo = function(x) 2^x),
#'   makeDiscreteParam("kernel", values = c("vanilladot", "polydot", "rbfdot")),
#'   makeNumericParam("sigma", lower = -12, upper = 12, trafo = function(x) 2^x,
#'     requires = quote(kernel == "rbfdot")),
#'   makeIntegerParam("degree", lower = 2L, upper = 5L,
#'     requires = quote(kernel == "polydot"))
#' )
#' print(ps)
#' ctrl = makeTuneControlIrace(maxExperiments = 5, nbIterations = 1, minNbSurvival = 1)
#' rdesc = makeResampleDesc("Holdout")
#' res = tuneParams("classif.ksvm", iris.task, rdesc, par.set = ps, control = ctrl)
#' print(res)
#' df = as.data.frame(res$opt.path)
#' print(head(df[, -ncol(df)]))
#'
#' # include the training set performance as well
#' rdesc = makeResampleDesc("Holdout", predict = "both")
#' res = tuneParams("classif.ksvm", iris.task, rdesc, par.set = ps,
#'   control = ctrl, measures = list(mmce, setAggregation(mmce, train.mean)))
#' print(res)
#' df2 = as.data.frame(res$opt.path)
#' print(head(df2[, -ncol(df2)]))
#' }
#' \dontshow{ \} }
#' \dontshow{ \} }
#' \dontshow{ \} }
#' @seealso [generateHyperParsEffectData]
tuneParams = function(learner, task, resampling, measures, par.set, control,
  show.info = getMlrOption("show.info"), resample.fun = resample) {

  learner = checkLearner(learner)
  assertClass(task, classes = "Task")
  measures = checkMeasures(measures, learner)
  assertClass(par.set, classes = "ParamSet")
  assertClass(control, classes = "TuneControl")
  assertFunction(resample.fun)
  if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance")) {
    stop("Argument resampling must be of class ResampleDesc or ResampleInstance!")
  }
  if (inherits(resampling, "ResampleDesc") && control$same.resampling.instance) {
    resampling = makeResampleInstance(resampling, task = task)
  }
  assertFlag(show.info)
  checkTunerParset(learner, par.set, measures, control)
  control = setDefaultImputeVal(control, measures)

  cl = getClass1(control)
  sel.func = switch(cl,
    TuneControlRandom = tuneRandom,
    TuneControlGrid = tuneGrid,
    TuneControlDesign = tuneDesign,
    TuneControlCMAES = tuneCMAES,
    TuneControlGenSA = tuneGenSA,
    TuneControlMBO = tuneMBO,
    TuneControlIrace = tuneIrace,
    stopf("Tuning algorithm for '%s' does not exist!", cl)
  )

  need.extra = control$tune.threshold || getMlrOption("on.error.dump")
  opt.path = makeOptPathDFFromMeasures(par.set, measures, include.extra = need.extra)
  if (show.info) {
    messagef("[Tune] Started tuning learner %s for parameter set:", learner$id)
    message(printToChar(par.set)) # using message() since this can go over the char limit of messagef(), see issue #1528
    messagef("With control class: %s", cl)
    messagef("Imputation value: %g", control$impute.val)
  }

  or = sel.func(learner, task, resampling, measures, par.set, control,
    opt.path, show.info, resample.fun)
  if (show.info) {
    messagef("[Tune] Result: %s : %s", paramValueToString(par.set, or$x), perfsToString(or$y))
  }
  return(or)
}


#' @title Get the optimization path of a tuning result.
#'
#' @description
#' Returns the opt.path from a ([TuneResult]) object.
#' @param tune.result ([TuneResult]) \cr
#'   A tuning result of the ([tuneParams]) function.
#' @param as.df (`logical(1)`)\cr
#'   Should the optimization path be returned as a data frame?
#'   Default is `TRUE`.
#' @return ([ParamHelpers::OptPath]) or ([data.frame]).
#' @export
getTuneResultOptPath = function(tune.result, as.df = TRUE) {
  if (as.df == TRUE) {
    return(as.data.frame(tune.result$opt.path))
  } else {
    return(tune.result$opt.path)
  }
}