File: tuneRF.R

package info (click to toggle)
r-cran-foreach 1.5.2-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 648 kB
  • sloc: makefile: 2
file content (43 lines) | stat: -rw-r--r-- 1,267 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# tuning random forest over mtry parameter in parallel

library(foreach)
library(randomForest)

# a simple iterator over different values for the mtry argument
mtryiter <- function(from, to, stepFactor=2) {
  nextEl <- function() {
    if (from > to) stop('StopIteration')
    i <- from
    from <<- ceiling(from * stepFactor)
    i
  }
  obj <- list(nextElem=nextEl)
  class(obj) <- c('abstractiter', 'iter')
  obj
}

# vector of ntree values that we're interested in
vntree <- c(25, 50, 100, 200, 500, 1000)

# function that gets random forest error information for different values of mtry
tune <- function(x, y, ntree=vntree, mtry=NULL, keep.forest=FALSE, ...) {
  comb <- if (is.factor(y))
    function(a, b) rbind(a, data.frame(ntree=ntree, mtry=b$mtry, error=b$err.rate[ntree, 1]))
  else
    function(a, b) rbind(a, data.frame(ntree=ntree, mtry=b$mtry, error=b$mse[ntree]))

  foreach(mtry=mtryiter(1, ncol(x)), .combine=comb, .init=NULL,
          .packages='randomForest') %dopar% {
    randomForest(x, y, ntree=max(ntree), mtry=mtry, keep.forest=FALSE, ...)
  }
}

# generate the inputs
x <- matrix(runif(2000), 100)
y <- gl(2, 50)

# execute randomForest
results <- tune(x, y)

# print the result
print(results)