File: gather.R

package info (click to toggle)
r-cran-rsample 0.0.8-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 1,696 kB
  • sloc: sh: 13; makefile: 2
file content (64 lines) | stat: -rw-r--r-- 2,117 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#' Gather an `rset` Object
#'
#' This method uses `gather` on an `rset` object to stack all of
#'  the non-ID or split columns in the data and is useful for
#'  stacking model evaluation statistics. The resulting data frame
#'  has a column based on the column names of `data` and another for
#'  the values.
#'
#' @param data An `rset` object.
#' @param key,value,... Not specified in this method and will be
#'  ignored. Note that this means that selectors are ignored if
#'  they are passed to the function.
#' @param na.rm If `TRUE`, will remove rows from output where the
#'  value column in `NA`.
#' @param convert If `TRUE` will automatically run
#'  `type.convert()` on the key column. This is useful if the column
#'  names are actually numeric, integer, or logical.
#' @param factor_key If FALSE, the default, the key values will be
#'  stored as a character vector. If `TRUE`, will be stored as a
#'  factor, which preserves the original ordering of the columns.
#' @return A data frame with the ID columns, a column called
#'  `model` (with the previous column names), and a column called
#'  `statistic` (with the values).
#' @examples
#' library(rsample)
#' cv_obj <- vfold_cv(mtcars, v = 10)
#' cv_obj$lm_rmse <- rnorm(10, mean = 2)
#' cv_obj$nnet_rmse <- rnorm(10, mean = 1)
#' gather(cv_obj)
#' @export gather.rset
#' @export
#' @method gather rset
gather.rset <- function(data, key = NULL, value = NULL, ..., na.rm = TRUE,
                        convert = FALSE, factor_key = TRUE) {
  if (any(names(data) == "splits")) {
    data <- data %>% dplyr::select(-splits)
  }

  data <- as.data.frame(data)

  id_vars <- grep("^id", names(data), value = TRUE)

  other_vars <- names(data)[!(names(data) %in% id_vars)]
  if (length(other_vars) < 2) {
    rlang::abort(
      paste0(
        "There should be at least two other columns ",
        "(besides `id` variables) in the data set to ",
        "use `gather.rset()`."
      )
    )
  }

  tidyr::gather(
    data,
    key = model,
    value = statistic,
    - !!id_vars,
    na.rm = na.rm,
    convert = convert,
    factor_key = factor_key
  )
}