File: rfcv.Rd

package info (click to toggle)
r-cran-randomforest 4.7-1.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 496 kB
  • sloc: ansic: 1,897; fortran: 366; makefile: 2
file content (71 lines) | stat: -rw-r--r-- 2,652 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
\name{rfcv}
\alias{rfcv}
\title{Random Forest Cross-Valdidation for feature selection}
\description{
This function shows the cross-validated prediction performance of
models with sequentially reduced number of predictors (ranked by
variable importance) via a nested cross-validation procedure.
}
\usage{
rfcv(trainx, trainy, cv.fold=5, scale="log", step=0.5,
     mtry=function(p) max(1, floor(sqrt(p))), recursive=FALSE, ...)
}
\arguments{
  \item{trainx}{matrix or data frame containing columns of predictor
  variables}
  \item{trainy}{vector of response, must have length equal to the number
  of rows in \code{trainx}}
  \item{cv.fold}{number of folds in the cross-validation}
  \item{scale}{if \code{"log"}, reduce a fixed proportion (\code{step})
  of variables at each step, otherwise reduce \code{step} variables at a
  time}
  \item{step}{if \code{log=TRUE}, the fraction of variables to remove at
	each step, else remove this many variables at a time}
  \item{mtry}{a function of number of remaining predictor variables to
	use as the \code{mtry} parameter in the \code{randomForest} call}
  \item{recursive}{whether variable importance is (re-)assessed at each
  step of variable reduction}
  \item{...}{other arguments passed on to \code{randomForest}}
}
\value{
  A list with the following components:

    list(n.var=n.var, error.cv=error.cv, predicted=cv.pred)
	\item{n.var}{vector of number of variables used at each step}
	\item{error.cv}{corresponding vector of error rates or MSEs at each
	  step}
	\item{predicted}{list of \code{n.var} components, each containing
	  the predicted values from the cross-validation}
}
%\details{
%}
\references{
Svetnik, V., Liaw, A., Tong, C. and Wang, T., ``Application of Breiman's
Random Forest to Modeling Structure-Activity Relationships of
Pharmaceutical Molecules'', MCS 2004, Roli, F. and Windeatt, T. (Eds.)
pp. 334-343.
}
\seealso{
  \code{\link{randomForest}}, \code{\link{importance}}
}
\examples{
set.seed(647)
myiris <- cbind(iris[1:4], matrix(runif(96 * nrow(iris)), nrow(iris), 96))
result <- rfcv(myiris, iris$Species, cv.fold=3)
with(result, plot(n.var, error.cv, log="x", type="o", lwd=2))

## The following can take a while to run, so if you really want to try
## it, copy and paste the code into R.

\dontrun{
result <- replicate(5, rfcv(myiris, iris$Species), simplify=FALSE)
error.cv <- sapply(result, "[[", "error.cv")
matplot(result[[1]]$n.var, cbind(rowMeans(error.cv), error.cv), type="l",
        lwd=c(2, rep(1, ncol(error.cv))), col=1, lty=1, log="x",
        xlab="Number of variables", ylab="CV Error")
}
}
\author{Andy Liaw}
\keyword{classif}
\keyword{regression}