File: FeatSelControl.Rd

package info (click to toggle)
r-cran-mlr 2.19.2%2Bdfsg-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 8,264 kB
sloc: ansic: 65; sh: 13; makefile: 5
file content (207 lines) | stat: -rw-r--r-- 8,696 bytes
parent folder | download | duplicates (2)
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/FeatSelControl.R, R/FeatSelControlExhaustive.R,
%   R/FeatSelControlGA.R, R/FeatSelControlRandom.R, R/FeatSelControlSequential.R
\name{FeatSelControl}
\alias{FeatSelControl}
\alias{FeatSelControlExhaustive}
\alias{FeatSelControlRandom}
\alias{FeatSelControlSequential}
\alias{FeatSelControlGA}
\alias{makeFeatSelControlExhaustive}
\alias{makeFeatSelControlGA}
\alias{makeFeatSelControlRandom}
\alias{makeFeatSelControlSequential}
\title{Create control structures for feature selection.}
\usage{
makeFeatSelControlExhaustive(
  same.resampling.instance = TRUE,
  maxit = NA_integer_,
  max.features = NA_integer_,
  tune.threshold = FALSE,
  tune.threshold.args = list(),
  log.fun = "default"
)

makeFeatSelControlGA(
  same.resampling.instance = TRUE,
  impute.val = NULL,
  maxit = NA_integer_,
  max.features = NA_integer_,
  comma = FALSE,
  mu = 10L,
  lambda,
  crossover.rate = 0.5,
  mutation.rate = 0.05,
  tune.threshold = FALSE,
  tune.threshold.args = list(),
  log.fun = "default"
)

makeFeatSelControlRandom(
  same.resampling.instance = TRUE,
  maxit = 100L,
  max.features = NA_integer_,
  prob = 0.5,
  tune.threshold = FALSE,
  tune.threshold.args = list(),
  log.fun = "default"
)

makeFeatSelControlSequential(
  same.resampling.instance = TRUE,
  impute.val = NULL,
  method,
  alpha = 0.01,
  beta = -0.001,
  maxit = NA_integer_,
  max.features = NA_integer_,
  tune.threshold = FALSE,
  tune.threshold.args = list(),
  log.fun = "default"
)
}
\arguments{
\item{same.resampling.instance}{(\code{logical(1)})\cr
Should the same resampling instance be used for all evaluations to reduce variance?
Default is \code{TRUE}.}

\item{maxit}{(\code{integer(1)})\cr
Maximal number of iterations. Note, that this is usually not equal to the number
of function evaluations.}

\item{max.features}{(\code{integer(1)})\cr
Maximal number of features.}

\item{tune.threshold}{(\code{logical(1)})\cr
Should the threshold be tuned for the measure at hand, after each feature set evaluation,
via \link{tuneThreshold}?
Only works for classification if the predict type is \dQuote{prob}.
Default is \code{FALSE}.}

\item{tune.threshold.args}{(\link{list})\cr
Further arguments for threshold tuning that are passed down to \link{tuneThreshold}.
Default is none.}

\item{log.fun}{(\code{function} | \code{character(1)})\cr
Function used for logging. If set to \dQuote{default} (the default), the evaluated design points, the resulting
performances, and the runtime will be reported.
If set to \dQuote{memory} the memory usage for each evaluation will also be displayed, with \code{character(1)} small increase
in run time.
Otherwise \code{character(1)} function with arguments \code{learner}, \code{resampling}, \code{measures},
\code{par.set}, \code{control}, \code{opt.path}, \code{dob}, \code{x}, \code{y}, \code{remove.nas},
\code{stage} and \code{prev.stage} is expected.
The default displays the performance measures, the time needed for evaluating,
the currently used memory and the max memory ever used before
(the latter two both taken from \link{gc}).
See the implementation for details.}

\item{impute.val}{(\link{numeric})\cr
If something goes wrong during optimization (e.g. the learner crashes),
this value is fed back to the tuner, so the tuning algorithm does not abort.
Imputation is only active if \code{on.learner.error} is configured not to stop in \link{configureMlr}.
It is not stored in the optimization path, an NA and a corresponding error message are
logged instead.
Note that this value is later multiplied by -1 for maximization measures internally, so you
need to enter a larger positive value for maximization here as well.
Default is the worst obtainable value of the performance measure you optimize for when
you aggregate by mean value, or \code{Inf} instead.
For multi-criteria optimization pass a vector of imputation values, one for each of your measures,
in the same order as your measures.}

\item{comma}{(\code{logical(1)})\cr
Parameter of the GA feature selection, indicating whether to use a (\code{mu}, \code{lambda})
or (\code{mu} + \code{lambda}) GA. The default is \code{FALSE}.}

\item{mu}{(\code{integer(1)})\cr
Parameter of the GA feature selection. Size of the parent population.}

\item{lambda}{(\code{integer(1)})\cr
Parameter of the GA feature selection. Size of the children population (should be smaller
or equal to \code{mu}).}

\item{crossover.rate}{(\code{numeric(1)})\cr
Parameter of the GA feature selection. Probability of choosing a bit from the first parent
within the crossover mutation.}

\item{mutation.rate}{(\code{numeric(1)})\cr
Parameter of the GA feature selection. Probability of flipping a feature bit, i.e. switch
between selecting / deselecting a feature.}

\item{prob}{(\code{numeric(1)})\cr
Parameter of the random feature selection. Probability of choosing a feature.}

\item{method}{(\code{character(1)})\cr
Parameter of the sequential feature selection. A character representing the method. Possible
values are \code{sfs} (forward search), \code{sbs} (backward search), \code{sffs}
(floating forward search) and \code{sfbs} (floating backward search).}

\item{alpha}{(\code{numeric(1)})\cr
Parameter of the sequential feature selection.
Minimal required value of improvement difference for a forward / adding step.
Default is 0.01.}

\item{beta}{(\code{numeric(1)})\cr
Parameter of the sequential feature selection.
Minimal required value of improvement difference for a backward / removing step.
Negative values imply that you allow a slight decrease for the removal of a feature.
Default is -0.001.}
}
\value{
(\link{FeatSelControl}). The specific subclass is one of
\link{FeatSelControlExhaustive}, \link{FeatSelControlRandom},
\link{FeatSelControlSequential}, \link{FeatSelControlGA}.
}
\description{
Feature selection method used by \link{selectFeatures}.\cr
The methods used here follow a wrapper approach, described in
Kohavi and John (1997) (see references).

The following optimization algorithms are available:
\describe{
\item{FeatSelControlExhaustive}{Exhaustive search. All feature sets (up to a certain number
of features \code{max.features}) are searched.}
\item{FeatSelControlRandom}{Random search. Features vectors are randomly drawn,
up to a certain number of features \code{max.features}.
A feature is included in the current set with probability \code{prob}.
So we are basically drawing (0,1)-membership-vectors, where each element
is Bernoulli(\code{prob}) distributed.}
\item{FeatSelControlSequential}{Deterministic forward or backward search. That means extending
(forward) or shrinking (backward) a feature set.
Depending on the given \code{method} different approaches are taken.\cr
\code{sfs} Sequential Forward Search: Starting from an empty model, in each step the feature increasing
the performance measure the most is added to the model.\cr
\code{sbs} Sequential Backward Search: Starting from a model with all features, in each step the feature
decreasing the performance measure the least is removed from the model.\cr
\code{sffs} Sequential Floating Forward Search: Starting from an empty model, in each step the algorithm
chooses the best model from all models with one additional feature and from all models with one
feature less.\cr
\code{sfbs} Sequential Floating Backward Search: Similar to \code{sffs} but starting with a full model.}
\item{FeatSelControlGA}{Search via genetic algorithm.
The GA is a simple (\code{mu}, \code{lambda}) or (\code{mu} + \code{lambda}) algorithm,
depending on the \code{comma} setting.
A comma strategy selects a new population of size \code{mu} out of the
\code{lambda} > \code{mu} offspring.
A plus strategy uses the joint pool of \code{mu} parents and \code{lambda} offspring
for selecting \code{mu} new candidates.
Out of those \code{mu} features, the new \code{lambda} features are generated
by randomly choosing pairs of parents. These are crossed over and \code{crossover.rate}
represents the probability of choosing a feature from the first parent instead of
the second parent.
The resulting offspring is mutated, i.e., its bits are flipped with
probability \code{mutation.rate}. If \code{max.features} is set, offspring are
repeatedly generated until the setting is satisfied.}
}
}
\references{
Ron Kohavi and George H. John,
Wrappers for feature subset selection, Artificial Intelligence Volume 97, 1997, 273-324.
\url{http://ai.stanford.edu/~ronnyk/wrappersPrint.pdf}.\cr
}
\seealso{
Other featsel: 
\code{\link{analyzeFeatSelResult}()},
\code{\link{getFeatSelResult}()},
\code{\link{makeFeatSelWrapper}()},
\code{\link{selectFeatures}()}
}
\concept{featsel}