File: complement.R

package info (click to toggle)
r-cran-rsample 1.1.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,872 kB
  • sloc: sh: 13; makefile: 2
file content (139 lines) | stat: -rw-r--r-- 3,128 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#' Determine the Assessment Samples
#'
#' This method and function help find which data belong in the analysis and
#' assessment sets.
#'
#' Given an `rsplit` object, `complement()` will determine which
#'   of the data rows are contained in the assessment set. To save space,
#'   many of the `rsplit` objects will not contain indices for the
#'   assessment split.
#'
#' @param x An `rsplit` object
#' @param ... Not currently used
#' @return A integer vector.
#' @seealso [populate()]
#' @examples
#' set.seed(28432)
#' fold_rs <- vfold_cv(mtcars)
#' head(fold_rs$splits[[1]]$in_id)
#' fold_rs$splits[[1]]$out_id
#' complement(fold_rs$splits[[1]])
#' @export
complement <- function(x, ...) {
  UseMethod("complement")
}

#' @export
#' @rdname complement
complement.rsplit <- function(x, ...) {
  if (!is_missing_out_id(x)) {
    return(x$out_id)
  } else {
    (1:nrow(x$data))[-unique(x$in_id)]
  }
}
#' @export
#' @rdname complement
complement.rof_split <- function(x, ...) {
  get_stored_out_id(x)
}
#' @export
#' @rdname complement
complement.sliding_window_split <- function(x, ...) {
  get_stored_out_id(x)
}
#' @export
#' @rdname complement
complement.sliding_index_split <- function(x, ...) {
  get_stored_out_id(x)
}
#' @export
#' @rdname complement
complement.sliding_period_split <- function(x, ...) {
  get_stored_out_id(x)
}

get_stored_out_id <- function(x) {
  out_id <- x$out_id

  if (length(out_id) == 0L) {
    return(out_id)
  }

  if (all(is.na(out_id))) {
    rlang::abort("Cannot derive the assessment set for this type of resampling.")
  }

  out_id
}

#' @export
#' @rdname complement
complement.apparent_split <- function(x, ...) {
  if (!is_missing_out_id(x)) {
    return(x$out_id)
  } else {
    1:nrow(x$data)
  }
}

#' @export
complement.default <- function(x, ...) {
  cls <- paste0("'", class(x), "'", collapse = ", ")
  rlang::abort(
    paste("No `complement()` method for this class(es)", cls)
  )
}

# Get the indices of the analysis set from the assessment set
default_complement <- function(ind, n) {
  list(
    analysis = setdiff(1:n, ind),
    assessment = unique(ind)
  )
}


#' Add Assessment Indices
#'
#' Many `rsplit` and `rset` objects do not contain indicators for
#'   the assessment samples. `populate()` can be used to fill the slot
#'   for the appropriate indices.
#' @param x A `rsplit` and `rset` object.
#' @param ... Not currently used
#' @return An object of the same kind with the integer indices.
#' @examples
#' set.seed(28432)
#' fold_rs <- vfold_cv(mtcars)
#'
#' fold_rs$splits[[1]]$out_id
#' complement(fold_rs$splits[[1]])
#'
#' populate(fold_rs$splits[[1]])$out_id
#'
#' fold_rs_all <- populate(fold_rs)
#' fold_rs_all$splits[[1]]$out_id
#' @export
populate <- function(x, ...) UseMethod("populate")

#' @export
populate.rsplit <- function(x, ...) {
  x$out_id <- complement(x, ...)
  x
}

#' @export
populate.rset <- function(x, ...) {
  x$splits <- map(x$splits, populate)
  x
}

## This will remove the assessment indices from an rsplit object
rm_out <- function(x) {
  x$out_id <- NA
  x
}

is_missing_out_id <- function(x) {
  identical(x$out_id, NA)
}