1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/deforest.R
\name{deforest}
\alias{deforest}
\alias{deforest.ranger}
\title{Deforesting a random forest}
\usage{
deforest(object, which.trees = NULL, ...)
\method{deforest}{ranger}(object, which.trees = NULL, warn = TRUE, ...)
}
\arguments{
\item{object}{A fitted random forest (e.g., a \code{\link{ranger}}
object).}
\item{which.trees}{Vector giving the indices of the trees to remove.}
\item{...}{Additional (optional) arguments. (Currently ignored.)}
\item{warn}{Logical indicating whether or not to warn users that some of the
standard output of a typical \code{\link{ranger}} object or no longer
available after deforestation. Default is \code{TRUE}.}
}
\value{
An object of class \code{"deforest.ranger"}; essentially, a
\code{\link{ranger}} object with certain components replaced with
\code{NA}s (e.g., out-of-bag (OOB) predictions, variable importance scores
(if requested), and OOB-based error metrics).
}
\description{
The main purpose of this function is to allow for post-processing of
ensembles via L2 regularized regression (i.e., the LASSO), as described in
Friedman and Popescu (2003). The basic idea is to use the LASSO to
post-process the predictions from the individual base learners in an ensemble
(i.e., decision trees) in the hopes of producing a much smaller model without
sacrificing much in the way of accuracy, and in some cases, improving it.
Friedman and Popescu (2003) describe conditions under which tree-based
ensembles, like random forest, can potentially benefit from such
post-processing (e.g., using shallower trees trained on much smaller samples
of the training data without replacement). However, the computational
benefits of such post-processing can only be realized if the base learners
"zeroed out" by the LASSO can actually be removed from the original ensemble,
hence the purpose of this function. A complete example using
\code{\link{ranger}} can be found at
\url{https://github.com/imbs-hl/ranger/issues/568}.
}
\note{
This function is a generic and can be extended by other packages.
}
\examples{
## Example of deforesting a random forest
rfo <- ranger(Species ~ ., data = iris, probability = TRUE, num.trees = 100)
dfo <- deforest(rfo, which.trees = c(1, 3, 5))
dfo # same as `rfo` but with trees 1, 3, and 5 removed
## Sanity check
preds.rfo <- predict(rfo, data = iris, predict.all = TRUE)$predictions
preds.dfo <- predict(dfo, data = iris, predict.all = TRUE)$predictions
identical(preds.rfo[, , -c(1, 3, 5)], y = preds.dfo)
}
\references{
Friedman, J. and Popescu, B. (2003). Importance sampled learning ensembles,
Technical report, Stanford University, Department of Statistics.
\url{https://jerryfriedman.su.domains/ftp/isle.pdf}.
}
\author{
Brandon M. Greenwell
}
|