| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 
 | \name{partialPlot}
\alias{partialPlot}
\alias{partialPlot.default}
\alias{partialPlot.randomForest}
\title{Partial dependence plot}
\description{
Partial dependence plot gives a graphical depiction of the marginal
effect of a variable on the class probability (classification) or
response (regression).
}
\usage{
\method{partialPlot}{randomForest}(x, pred.data, x.var, which.class,
      w, plot = TRUE, add = FALSE,
      n.pt = min(length(unique(pred.data[, xname])), 51),
      rug = TRUE, xlab=deparse(substitute(x.var)), ylab="",
      main=paste("Partial Dependence on", deparse(substitute(x.var))),
      ...)
}
\arguments{
  \item{x}{an object of class \code{randomForest}, which contains a
    \code{forest} component.}
  \item{pred.data}{a data frame used for contructing the plot, usually
    the training data used to contruct the random forest.}
  \item{x.var}{name of the variable for which partial
    dependence is to be examined.}
  \item{which.class}{For classification data, the class to focus on
    (default the first class).}
  \item{w}{weights to be used in averaging; if not supplied, mean is not
  weighted}
  \item{plot}{whether the plot should be shown on the graphic device.}
  \item{add}{whether to add to existing plot (\code{TRUE}).}
  \item{n.pt}{if \code{x.var} is continuous, the number of points on the
    grid for evaluating partial dependence.}
  \item{rug}{whether to draw hash marks at the bottom of the plot
    indicating the deciles of \code{x.var}.}
  \item{xlab}{label for the x-axis.}
  \item{ylab}{label for the y-axis.}
  \item{main}{main title for the plot.}
  \item{...}{other graphical parameters to be passed on to \code{plot}
    or \code{lines}.}
}
\value{
A list with two components: \code{x} and \code{y}, which are the values
used in the plot.
}
\details{
  The function being plotted is defined as:
  \deqn{
    \tilde{f}(x) = \frac{1}{n} \sum_{i=1}^n f(x, x_{iC}),
  }
  where \eqn{x} is the variable for which partial dependence is sought,
  and \eqn{x_{iC}} is the other variables in the data.  The summand is
  the predicted regression function for regression, and logits
  (i.e., log of fraction of votes) for \code{which.class} for
  classification:
\deqn{ f(x) = \log p_k(x) - \frac{1}{K} \sum_{j=1}^K \log p_j(x),}
where \eqn{K} is the number of classes, \eqn{k} is \code{which.class},
and \eqn{p_j} is the proportion of votes for class \eqn{j}.
}
\note{
  The \code{randomForest} object must contain the \code{forest}
  component; i.e., created with \code{randomForest(...,
    keep.forest=TRUE)}.
  This function runs quite slow for large data sets.
}
\references{
Friedman, J. (2001). Greedy function approximation: the gradient
boosting machine, \emph{Ann. of Stat.}}
\seealso{\code{\link{randomForest}}}
\author{Andy Liaw \email{andy_liaw@merck.com}}
\examples{
data(iris)
set.seed(543)
iris.rf <- randomForest(Species~., iris)
partialPlot(iris.rf, iris, Petal.Width, "versicolor")
## Looping over variables ranked by importance:
data(airquality)
airquality <- na.omit(airquality)
set.seed(131)
ozone.rf <- randomForest(Ozone ~ ., airquality, importance=TRUE)
imp <- importance(ozone.rf)
impvar <- rownames(imp)[order(imp[, 1], decreasing=TRUE)]
op <- par(mfrow=c(2, 3))
for (i in seq_along(impvar)) {
    partialPlot(ozone.rf, airquality, impvar[i], xlab=impvar[i],
                main=paste("Partial Dependence on", impvar[i]),
                ylim=c(30, 70))
}
par(op)
}
\keyword{classif}
\keyword{regression}
\keyword{tree}
 |