File: generateFeatureImportanceData.Rd

package info (click to toggle)
r-cran-mlr 2.19.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 8,264 kB
  • sloc: ansic: 65; sh: 13; makefile: 5
file content (134 lines) | stat: -rw-r--r-- 5,065 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generateFeatureImportance.R
\name{generateFeatureImportanceData}
\alias{generateFeatureImportanceData}
\alias{FeatureImportanceData}
\title{Generate feature importance.}
\usage{
generateFeatureImportanceData(
  task,
  method = "permutation.importance",
  learner,
  features = getTaskFeatureNames(task),
  interaction = FALSE,
  measure,
  contrast = function(x, y) x - y,
  aggregation = mean,
  nmc = 50L,
  replace = TRUE,
  local = FALSE,
  show.info = FALSE
)
}
\arguments{
\item{task}{(\link{Task})\cr
The task.}

\item{method}{(\code{character(1)})\cr
The method used to compute the feature importance.
The only method available is \dQuote{permutation.importance}.
Default is \dQuote{permutation.importance}.}

\item{learner}{(\link{Learner} | \code{character(1)})\cr
The learner.
If you pass a string the learner will be created via \link{makeLearner}.}

\item{features}{(\link{character})\cr
The features to compute the importance of.
The default is all of the features contained in the \link{Task}.}

\item{interaction}{(\code{logical(1)})\cr
Whether to compute the importance of the \code{features} argument jointly.
For \code{method = "permutation.importance"} this entails permuting the values of
all \code{features} together and then contrasting the performance with that of
the performance without the features being permuted.
The default is \code{FALSE}.}

\item{measure}{(\link{Measure})\cr
Performance measure.
Default is the first measure used in the benchmark experiment.}

\item{contrast}{(\code{function})\cr
A difference function that takes a numeric vector and returns a numeric vector
of the same length.
The default is element-wise difference between the vectors.}

\item{aggregation}{(\code{function})\cr
A function which aggregates the differences.
This function must take a numeric vector and return a numeric vector of length 1.
The default is \code{mean}.}

\item{nmc}{(\code{integer(1)})\cr
The number of Monte-Carlo iterations to use in computing the feature importance.
If \code{nmc == -1} and \code{method = "permutation.importance"} then all
permutations of the \code{features} are used.
The default is 50.}

\item{replace}{(\code{logical(1)})\cr
Whether or not to sample the feature values with or without replacement.
The default is \code{TRUE}.}

\item{local}{(\code{logical(1)})\cr
Whether to compute the per-observation importance.
The default is \code{FALSE}.}

\item{show.info}{(\code{logical(1)})\cr
Whether progress output (feature name, time elapsed) should be displayed.}
}
\value{
(\code{FeatureImportance}). A named list which contains the computed feature importance and the input arguments.

Object members:
\item{res}{(\link{data.frame})\cr
Has columns for each feature or combination of features (colon separated) for which the importance is computed.
A row coresponds to importance of the feature specified in the column for the target.
}
\item{interaction}{(\code{logical(1)})\cr
Whether or not the importance of the \code{features} was computed jointly rather than individually.
}
\item{measure}{(\link{Measure})}\cr
The measure used to compute performance.
\item{contrast}{(\code{function})\cr
The function used to compare the performance of predictions.
}
\item{aggregation}{(\code{function})\cr
The function which is used to aggregate the contrast between the performance of predictions across Monte-Carlo iterations.
}
\item{replace}{(\code{logical(1)})\cr
Whether or not, when \code{method = "permutation.importance"}, the feature values
are sampled with replacement.
}
\item{nmc}{(\code{integer(1)})\cr
The number of Monte-Carlo iterations used to compute the feature importance.
When \code{nmc == -1} and \code{method = "permutation.importance"} all permutations are used.
}
\item{local}{(\code{logical(1)})\cr
Whether observation-specific importance is computed for the \code{features}.
}
}
\description{
Estimate how important individual features or groups of features are by contrasting prediction performances. For method \dQuote{permutation.importance} compute the change in performance from permuting the values of a feature (or a group of features) and compare that to the predictions made on the unmcuted data.
}
\examples{
\dontshow{ if (requireNamespace("rpart")) \{ }

lrn = makeLearner("classif.rpart", predict.type = "prob")
fit = train(lrn, iris.task)
imp = generateFeatureImportanceData(iris.task, "permutation.importance",
  lrn, "Petal.Width", nmc = 10L, local = TRUE)
\dontshow{ \} }
}
\references{
Jerome Friedman; Greedy Function Approximation: A Gradient Boosting Machine, Annals of Statistics, Vol. 29, No. 5 (Oct., 2001), pp. 1189-1232.
}
\seealso{
Other generate_plot_data: 
\code{\link{generateCalibrationData}()},
\code{\link{generateCritDifferencesData}()},
\code{\link{generateFilterValuesData}()},
\code{\link{generateLearningCurveData}()},
\code{\link{generatePartialDependenceData}()},
\code{\link{generateThreshVsPerfData}()},
\code{\link{plotFilterValues}()}
}
\concept{generate_plot_data}