1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
|
#' @title Generate feature importance.
#'
#' @description
#' Estimate how important individual features or groups of features are by contrasting prediction performances. For method \dQuote{permutation.importance} compute the change in performance from permuting the values of a feature (or a group of features) and compare that to the predictions made on the unmcuted data.
#'
#' @family generate_plot_data
#' @aliases FeatureImportanceData
#'
#' @template arg_task
#' @param method (`character(1)`)\cr
#' The method used to compute the feature importance.
#' The only method available is \dQuote{permutation.importance}.
#' Default is \dQuote{permutation.importance}.
#' @template arg_learner
#' @param features ([character])\cr
#' The features to compute the importance of.
#' The default is all of the features contained in the [Task].
#' @param interaction (`logical(1)`)\cr
#' Whether to compute the importance of the `features` argument jointly.
#' For `method = "permutation.importance"` this entails permuting the values of
#' all `features` together and then contrasting the performance with that of
#' the performance without the features being permuted.
#' The default is `FALSE`.
#' @template arg_measure
#' @param contrast (`function`)\cr
#' A difference function that takes a numeric vector and returns a numeric vector
#' of the same length.
#' The default is element-wise difference between the vectors.
#' @param aggregation (`function`)\cr
#' A function which aggregates the differences.
#' This function must take a numeric vector and return a numeric vector of length 1.
#' The default is `mean`.
#' @param nmc (`integer(1)`)\cr
#' The number of Monte-Carlo iterations to use in computing the feature importance.
#' If `nmc == -1` and `method = "permutation.importance"` then all
#' permutations of the `features` are used.
#' The default is 50.
#' @param replace (`logical(1)`)\cr
#' Whether or not to sample the feature values with or without replacement.
#' The default is `TRUE`.
#' @param local (`logical(1)`)\cr
#' Whether to compute the per-observation importance.
#' The default is `FALSE`.
#' @param show.info (`logical(1)`)\cr
#' Whether progress output (feature name, time elapsed) should be displayed.
#'
#' @return (`FeatureImportance`). A named list which contains the computed feature importance and the input arguments.
#'
#' Object members:
#' \item{res}{([data.frame])\cr
#' Has columns for each feature or combination of features (colon separated) for which the importance is computed.
#' A row coresponds to importance of the feature specified in the column for the target.
#' }
#' \item{interaction}{(`logical(1)`)\cr
#' Whether or not the importance of the `features` was computed jointly rather than individually.
#' }
#' \item{measure}{([Measure])}\cr
#' The measure used to compute performance.
#' \item{contrast}{(`function`)\cr
#' The function used to compare the performance of predictions.
#' }
#' \item{aggregation}{(`function`)\cr
#' The function which is used to aggregate the contrast between the performance of predictions across Monte-Carlo iterations.
#' }
#' \item{replace}{(`logical(1)`)\cr
#' Whether or not, when `method = "permutation.importance"`, the feature values
#' are sampled with replacement.
#' }
#' \item{nmc}{(`integer(1)`)\cr
#' The number of Monte-Carlo iterations used to compute the feature importance.
#' When `nmc == -1` and `method = "permutation.importance"` all permutations are used.
#' }
#' \item{local}{(`logical(1)`)\cr
#' Whether observation-specific importance is computed for the `features`.
#' }
#'
#' @examples
#' \dontshow{ if (requireNamespace("rpart")) \{ }
#'
#' lrn = makeLearner("classif.rpart", predict.type = "prob")
#' fit = train(lrn, iris.task)
#' imp = generateFeatureImportanceData(iris.task, "permutation.importance",
#' lrn, "Petal.Width", nmc = 10L, local = TRUE)
#' \dontshow{ \} }
#' @references Jerome Friedman; Greedy Function Approximation: A Gradient Boosting Machine, Annals of Statistics, Vol. 29, No. 5 (Oct., 2001), pp. 1189-1232.
#' @export
generateFeatureImportanceData = function(task, method = "permutation.importance",
learner, features = getTaskFeatureNames(task), interaction = FALSE, measure,
contrast = function(x, y) x - y, aggregation = mean, nmc = 50L, replace = TRUE,
local = FALSE, show.info = FALSE) {
learner = checkLearner(learner)
measure = checkMeasures(measure, learner)
if (length(measure) > 1L) {
stop("only one measure is allowed.")
}
if (getTaskType(task) != learner$type) {
stopf("Expected task of type '%s', not '%s'", getTaskType(task), learner$type)
}
assertCount(nmc)
test.contrast = contrast(1, 1)
if (!(is.numeric(test.contrast))) {
stop("the contrast function must return a numeric vector.")
}
if (!length(test.contrast) == 1L) {
stop("the contrast function must return a numeric vector the same length as the input.")
}
test.aggregation = aggregation(1:2)
if (!is.numeric(test.aggregation)) {
stop("aggregation argument doesn't return a numeric vector.")
}
if (!(length(test.aggregation) == 1L)) {
stop("aggregation function must either return 1 number or a numeric vector of the same length as the number of rows in the task data.frame.")
}
out = switch(method,
"permutation.importance" = doPermutationImportance(
task, learner, features, interaction, measure, contrast, aggregation, nmc, replace, local, show.info)
)
makeS3Obj(
"FeatureImportance",
res = out,
task.desc = getTaskDesc(task),
interaction = interaction,
learner = learner,
measure = measure,
contrast = contrast,
aggregation = aggregation,
nmc = nmc,
replace = replace,
local = local
)
}
doPermutationImportance = function(task, learner, features, interaction, measure,
contrast, aggregation, nmc, replace, local, show.info) {
## train learner to get baseline performance
fit = train(learner, task)
# compute unmcuted performance
pred = predict(fit, task = task)
if (local) {
# subset the prediction data element to compute the per-observation performance
perf = vnapply(1:getTaskSize(task), function(i) {
pred$data = pred$data[i, ]
performance(pred, measure)
})
perf = as.numeric(perf)
} else {
perf = performance(pred, measure)
}
data = getTaskData(task)
## indices for resampled data to be used for permuting features
if (nmc == -1L) {
## from http://stackoverflow.com/questions/11095992/generating-all-distinct-permutations-of-a-list-in-r
permutations = function(n) {
if (n == 1L) {
return(matrix(1L))
} else {
sp = permutations(n - 1L)
p = nrow(sp)
A = matrix(nrow = n, ncol = n * p)
for (i in 1:n) {
A[, (i - 1) * p + 1:p] = rbind(i, sp + (sp >= i))
}
return(A)
}
}
indices = permutations(getTaskSize(task))
} else {
indices = replicate(nmc, sample.int(getTaskSize(task), replace = replace))
}
args = list(measure = measure, contrast = contrast, data = data,
perf = perf, fit = fit, indices = indices)
doPermutationImportanceIteration = function(perf, fit, data, measure,
contrast, indices, i, x, progress) {
data[, x] = data[indices[, i], x]
if (local) {
perf.permuted = lapply(seq_len(getTaskSize(task)), function(i, pred) {
pred$data = pred$data[i, ]
performance(pred, measure)
}, pred = predict(fit, newdata = data))
perf.permuted = as.numeric(perf.permuted)
} else {
perf.permuted = performance(predict(fit, newdata = data), measure)
}
contrast(perf.permuted, perf)
}
if (interaction) {
args$x = features
out = parallelMap(doPermutationImportanceIteration, i = seq_len(nmc), more.args = args)
out = do.call("rbind", out)
out = as.matrix(apply(out, 2, aggregation))
out = as.data.frame(out)
colnames(out) = stri_paste(features, collapse = ":")
} else {
if (isTRUE(show.info)) {
time = Sys.time()
}
out = lapply(features, function(x) {
if (isTRUE(show.info)) {
cat(sprintf("Feature: '%s' [%s/%s, %s min]\n", x, match(x, features),
length(features), round(difftime(Sys.time(), time, units = "mins"), 2)))
}
parallelMap(doPermutationImportanceIteration, i = seq_len(nmc), more.args = c(args, x = x))
})
out = lapply(out, function(x) apply(do.call("rbind", x), 2, aggregation))
out = t(do.call("rbind", out))
out = as.data.frame(out)
colnames(out) = features
}
out
}
#' @export
print.FeatureImportance = function(x, ...) {
catf("FeatureImportance:")
catf("Task: %s", x$task.desc$id)
catf("Interaction: %s", x$interaction)
catf("Learner: %s", x$learner$id)
catf("Measure: %s", ifelse(!is.na(x$measure), x$measure[[1]]$id, NA))
catf("Contrast: %s", stri_paste(format(x$contrast), collapse = " "))
catf("Aggregation: %s", stri_paste(format(x$aggregation), collapse = " "))
catf("Replace: %s", x$replace)
catf("Number of Monte-Carlo iterations: %s", x$nmc)
catf("Local: %s", x$local)
print(head(x$res))
}
|