File: downsample.R

package info (click to toggle)
r-cran-mlr 2.19.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 8,264 kB
  • sloc: ansic: 65; sh: 13; makefile: 5
file content (39 lines) | stat: -rw-r--r-- 1,321 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#' @title Downsample (subsample) a task or a data.frame.
#'
#' @description
#' Decrease the observations in a `task` or a `ResampleInstance`
#' to a given percentage of observations.
#'
#' @param obj ([Task] | [ResampleInstance])\cr
#'   Input data or a `ResampleInstance`.
#' @param perc (`numeric(1)`)\cr
#'   Percentage from (0, 1).
#'   Default is 1.
#' @param stratify (`logical(1)`)\cr
#'   Only for classification:
#'   Should the downsampled data be stratified according to the target classes?
#'   Default is `FALSE`.
#' @seealso [makeResampleInstance]
#' @return ([data.frame` | [Task] | [ResampleInstance]). Same type as `obj`.
#' @family downsample
#' @export
downsample = function(obj, perc = 1, stratify = FALSE) {
  assertNumber(perc, lower = 0, upper = 1)
  assertFlag(stratify)
  UseMethod("downsample")
}

#' @export
downsample.Task = function(obj, perc = 1, stratify = FALSE) {
  rin = makeResampleInstance("Holdout", stratify = stratify, split = perc, task = obj)
  subsetTask(task = obj, subset = rin$train.inds[[1L]])
}

#' @export
downsample.ResampleInstance = function(obj, perc = 1, stratify = FALSE) {
  if (stratify) {
    stop("Stratifying is not supported for a ResampleInstance!")
  }
  obj$train.inds = lapply(obj$train.inds, function(x) sample(x, size = length(x) * perc))
  return(obj)
}