File: generateFeatureImportanceData.Rd

package info (click to toggle)
r-cran-mlr 2.19.2%2Bdfsg-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 8,264 kB
sloc: ansic: 65; sh: 13; makefile: 5
file content (134 lines) | stat: -rw-r--r-- 5,065 bytes
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generateFeatureImportance.R
\name{generateFeatureImportanceData}
\alias{generateFeatureImportanceData}
\alias{FeatureImportanceData}
\title{Generate feature importance.}
\usage{
generateFeatureImportanceData(
  task,
  method = "permutation.importance",
  learner,
  features = getTaskFeatureNames(task),
  interaction = FALSE,
  measure,
  contrast = function(x, y) x - y,
  aggregation = mean,
  nmc = 50L,
  replace = TRUE,
  local = FALSE,
  show.info = FALSE
)
}
\arguments{
\item{task}{(\link{Task})\cr
The task.}

\item{method}{(\code{character(1)})\cr
The method used to compute the feature importance.
The only method available is \dQuote{permutation.importance}.
Default is \dQuote{permutation.importance}.}

\item{learner}{(\link{Learner} | \code{character(1)})\cr
The learner.
If you pass a string the learner will be created via \link{makeLearner}.}

\item{features}{(\link{character})\cr
The features to compute the importance of.
The default is all of the features contained in the \link{Task}.}

\item{interaction}{(\code{logical(1)})\cr
Whether to compute the importance of the \code{features} argument jointly.
For \code{method = "permutation.importance"} this entails permuting the values of
all \code{features} together and then contrasting the performance with that of
the performance without the features being permuted.
The default is \code{FALSE}.}

\item{measure}{(\link{Measure})\cr
Performance measure.
Default is the first measure used in the benchmark experiment.}

\item{contrast}{(\code{function})\cr
A difference function that takes a numeric vector and returns a numeric vector
of the same length.
The default is element-wise difference between the vectors.}

\item{aggregation}{(\code{function})\cr
A function which aggregates the differences.
This function must take a numeric vector and return a numeric vector of length 1.
The default is \code{mean}.}

\item{nmc}{(\code{integer(1)})\cr
The number of Monte-Carlo iterations to use in computing the feature importance.
If \code{nmc == -1} and \code{method = "permutation.importance"} then all
permutations of the \code{features} are used.
The default is 50.}

\item{replace}{(\code{logical(1)})\cr
Whether or not to sample the feature values with or without replacement.
The default is \code{TRUE}.}

\item{local}{(\code{logical(1)})\cr
Whether to compute the per-observation importance.
The default is \code{FALSE}.}

\item{show.info}{(\code{logical(1)})\cr
Whether progress output (feature name, time elapsed) should be displayed.}
}
\value{
(\code{FeatureImportance}). A named list which contains the computed feature importance and the input arguments.

Object members:
\item{res}{(\link{data.frame})\cr
Has columns for each feature or combination of features (colon separated) for which the importance is computed.
A row coresponds to importance of the feature specified in the column for the target.
}
\item{interaction}{(\code{logical(1)})\cr
Whether or not the importance of the \code{features} was computed jointly rather than individually.
}
\item{measure}{(\link{Measure})}\cr
The measure used to compute performance.
\item{contrast}{(\code{function})\cr
The function used to compare the performance of predictions.
}
\item{aggregation}{(\code{function})\cr
The function which is used to aggregate the contrast between the performance of predictions across Monte-Carlo iterations.
}
\item{replace}{(\code{logical(1)})\cr
Whether or not, when \code{method = "permutation.importance"}, the feature values
are sampled with replacement.
}
\item{nmc}{(\code{integer(1)})\cr
The number of Monte-Carlo iterations used to compute the feature importance.
When \code{nmc == -1} and \code{method = "permutation.importance"} all permutations are used.
}
\item{local}{(\code{logical(1)})\cr
Whether observation-specific importance is computed for the \code{features}.
}
}
\description{
Estimate how important individual features or groups of features are by contrasting prediction performances. For method \dQuote{permutation.importance} compute the change in performance from permuting the values of a feature (or a group of features) and compare that to the predictions made on the unmcuted data.
}
\examples{
\dontshow{ if (requireNamespace("rpart")) \{ }

lrn = makeLearner("classif.rpart", predict.type = "prob")
fit = train(lrn, iris.task)
imp = generateFeatureImportanceData(iris.task, "permutation.importance",
  lrn, "Petal.Width", nmc = 10L, local = TRUE)
\dontshow{ \} }
}
\references{
Jerome Friedman; Greedy Function Approximation: A Gradient Boosting Machine, Annals of Statistics, Vol. 29, No. 5 (Oct., 2001), pp. 1189-1232.
}
\seealso{
Other generate_plot_data: 
\code{\link{generateCalibrationData}()},
\code{\link{generateCritDifferencesData}()},
\code{\link{generateFilterValuesData}()},
\code{\link{generateLearningCurveData}()},
\code{\link{generatePartialDependenceData}()},
\code{\link{generateThreshVsPerfData}()},
\code{\link{plotFilterValues}()}
}
\concept{generate_plot_data}