File: holdoutRF.R

package info (click to toggle)
r-cran-ranger 0.17.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,160 kB
  • sloc: cpp: 8,324; sh: 13; makefile: 5; ansic: 2
file content (88 lines) | stat: -rw-r--r-- 3,143 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -------------------------------------------------------------------------------
#   This file is part of Ranger.
#
# Ranger is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ranger is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ranger. If not, see <http://www.gnu.org/licenses/>.
#
# Written by:
#
#   Marvin N. Wright
# Institut fuer Medizinische Biometrie und Statistik
# Universitaet zu Luebeck
# Ratzeburger Allee 160
# 23562 Luebeck
# Germany
#
# http://www.imbs-luebeck.de
# -------------------------------------------------------------------------------

##' Grow two random forests on two cross-validation folds. 
##' Instead of out-of-bag data, the other fold is used to compute permutation importance.
##' Related to the novel permutation variable importance by Janitza et al. (2015).
##'
##' @title Hold-out random forests
##' @param ... All arguments are passed to \code{\link{ranger}()} (except \code{importance}, \code{case.weights}, \code{replace} and \code{holdout}.). 
##' @return Hold-out random forests with variable importance.
##' @seealso \code{\link{ranger}}
##' @author Marvin N. Wright
##' @references
##'   Janitza, S., Celik, E. & Boulesteix, A.-L., (2015). A computationally fast variable importance test for random forests for high-dimensional data. Adv Data Anal Classif \doi{10.1007/s11634-016-0276-4}. \cr
##' @export 
holdoutRF <- function(...) {
  
  ## Get data from arguments
  args <- list(...)
  if ("data" %in% names(args)) {
    data <- args$data
  } else {
    data <- args[[2]]
  }
  
  ## Split data
  if (inherits(data, "gwaa.data")) {
    n <- nrow(data@phdata) 
  } else {
    n <- nrow(data)
  }
  weights <- rbinom(n, 1, 0.5)
  
  ## Check args
  if ("case.weights" %in% names(args)) {
    stop("Error: Argument 'case.weights' not supported in holdoutRF.")
  }
  if ("holdout" %in% names(args)) {
    stop("Error: Argument 'holdout' not supported in holdoutRF.")
  }
  if ("importance" %in% names(args)) {
    stop("Error: Argument 'importance' not supported in holdoutRF. Always set to 'permutation'.")
  }
  if ("replace" %in% names(args)) {
    stop("Error: Argument 'replace' not supported in holdoutRF.")
  }
  
  ## Grow RFs
  res <- list(
    rf1 = ranger(..., importance = "permutation",  
                 case.weights = weights, replace = FALSE, holdout = TRUE),
    rf2 = ranger(..., importance = "permutation",
                 case.weights = 1-weights, replace = FALSE, holdout = TRUE)
  )
  
  ## Compute importance
  res$variable.importance <- (res$rf1$variable.importance + res$rf2$variable.importance)/2
  res$treetype <- res$rf1$treetype
  res$importance.mode <- res$rf1$importance.mode
  class(res) <- "holdoutRF"
  
  res
}