1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
|
#' @title Get predictions from resample results.
#'
#' @description
#' Very simple getter.
#'
#' @param res ([ResampleResult])\cr
#' The result of [resample] run with `keep.pred = TRUE`.
#' @return ([ResamplePrediction]).
#' @export
#' @family resample
getRRPredictions = function(res) {
if (is.null(res$pred)) {
stopf("The 'pred' slot is empty because the ResampleResult was generated with keep.pred = FALSE.")
} else {
res$pred
}
}
#' @title Get task description from resample results (DEPRECATED).
#'
#' @description
#' Get a summarizing task description.
#'
#' @param res ([ResampleResult])\cr
#' The result of [resample].
#' @return ([TaskDesc]).
#' @export
#' @family resample
getRRTaskDescription = function(res) {
.Deprecated("getRRTaskDesc")
getRRTaskDesc(res)
}
#' @title Get task description from resample results (DEPRECATED).
#'
#' @description
#' Get a summarizing task description.
#'
#' @param res ([ResampleResult])\cr
#' The result of [resample].
#' @return ([TaskDesc]).
#' @export
#' @family resample
getRRTaskDesc = function(res) {
res$task.desc
}
#' @title Get list of predictions for train and test set of each single resample iteration.
#'
#' @description
#' This function creates a list with two slots `train` and `test` where
#' each slot is again a list of [Prediction] objects for each single
#' resample iteration.
#' In case that `predict = "train"` was used for the resample description
#' (see [makeResampleDesc]), the slot `test` will be `NULL`
#' and in case that `predict = "test"` was used, the slot `train` will be
#' `NULL`.
#'
#' @param res ([ResampleResult])\cr
#' The result of [resample] run with `keep.pred = TRUE`.
#' @param ... (any)\cr
#' Further options passed to [makePrediction].
#' @return [list].
#' @export
#' @family resample
getRRPredictionList = function(res, ...) {
assertClass(res, "ResampleResult")
# We need to force keep.pred = TRUE (will be checked in getRRPredictions)
pred = getRRPredictions(res)
predict.type = pred$predict.type
time = pred$time
task.desc = getRRTaskDesc(res)
# split by train and test set
set = levels(factor(pred$data$set))
# get prediction objects for train and test set
prediction = lapply(set, function(s) {
# split by resample iterations
p.split = pred$data[pred$data$set == s, , drop = FALSE]
p.split = split(p.split, as.factor(p.split$iter))
# create prediction object for each resample iteration
p.split = lapply(p.split, function(p) {
# get predictions based on predict.type
if (predict.type == "prob") {
y = p[, stri_startswith_fixed(colnames(p), "prob."), drop = FALSE]
# we need to remove the "prob." part in the colnames, otherwise
# makePrediction thinks that the factor starts with "prob."
colnames(y) = stri_replace_first_fixed(colnames(y), "prob.", replacement = "")
} else if (predict.type == "se") {
y = as.matrix(p[c("response", "se")])
} else {
y = p$response
}
makePrediction(task.desc, id = p$id,
truth = p$truth, y = y, row.names = p$id,
predict.type = predict.type, time = NA_real_, ...)
})
# add time info afterwards
for (i in seq_along(p.split)) {
p.split[[i]]$time = time[i]
}
return(p.split)
})
ret = setNames(prediction, set)
if (is.null(ret$train)) ret = append(ret, list(train = NULL))
if (is.null(ret$test)) ret = append(ret, list(test = NULL))
return(ret[c("train", "test")])
}
#' @title Compute new measures for existing ResampleResult
#' @description
#' Adds new measures to an existing `ResampleResult`.
#' @param res ([ResampleResult])\cr
#' The result of [resample] run with `keep.pred = TRUE`.
#' @template arg_measures
#' @return ([ResampleResult]).
#' @export
#' @family resample
addRRMeasure = function(res, measures) {
assertClass(res, "ResampleResult")
if (inherits(measures, "Measure")) measures = list(measures)
# check if measures are missing in ResampleResult object
measures.id = vcapply(measures, function(x) x$id)
missing.measures = setdiff(measures.id, colnames(res$measures.test))
# if there are missing measures
if (length(missing.measures) != 0) {
# get list of prediction objects per iteration from resample result
pred = getRRPredictionList(res)
# recompute missing performance for train and/or test set
set = names(pred)[!vlapply(pred, is.null)]
perf = setNames(lapply(set, function(s) {
as.data.frame(do.call("rbind", lapply(pred[[s]], function(p) {
ret = performance(p, measures)
matrix(ret, ncol = length(measures), dimnames = list(NULL, names(ret)))
})))
}), set)
# add missing measures to resample result
if (is.null(perf$train)) {
res$measures.train[, missing.measures] = NA
} else {
res$measures.train = cbind(res$measures.train, perf$train[, missing.measures, drop = FALSE])
}
if (is.null(perf$test)) {
res$measures.test[, missing.measures] = NA
} else {
res$measures.test = cbind(res$measures.test, perf$test[, missing.measures, drop = FALSE])
}
aggr = vnapply(measures[measures.id %in% missing.measures], function(m) {
m$aggr$fun(task = NULL,
perf.test = res$measures.test[, m$id],
perf.train = res$measures.train[, m$id],
measure = m,
pred = getRRPredictions(res),
group = res$pred$instance$group)
})
names(aggr) = vcapply(measures[measures.id %in% missing.measures], measureAggrName)
res$aggr = c(res$aggr, aggr)
}
return(res)
}
#' @title Return the error dump of ResampleResult.
#'
#' @description
#' Returns the error dumps generated during resampling, which can be used with `debugger()`
#' to debug errors. These dumps are saved if [configureMlr] configuration `on.error.dump`,
#' or the corresponding learner `config`, is `TRUE`.
#'
#' The returned object is a list with as many entries as the resampling being used has folds. Each of these
#' entries can have a subset of the following slots, depending on which step in the resampling iteration failed:
#' \dQuote{train} (error during training step), \dQuote{predict.train} (prediction on training subset),
#' \dQuote{predict.test} (prediction on test subset).
#'
#' @param res ([ResampleResult])\cr
#' The result of [resample].
#' @return [list].
#' @family debug
#' @export
getRRDump = function(res) {
return(res$err.dumps)
}
|