File: recode.R

package info (click to toggle)
r-cran-forcats 1.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 900 kB
  • sloc: makefile: 2
file content (77 lines) | stat: -rw-r--r-- 2,373 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#' Change factor levels by hand
#'
#' @param .f A factor (or character vector).
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> A sequence of named character
#'   vectors where the name gives the new level, and the value gives the old
#'   level. Levels not otherwise mentioned will be left as is. Levels can
#'   be removed by naming them `NULL`.
#' @export
#' @examples
#' x <- factor(c("apple", "bear", "banana", "dear"))
#' fct_recode(x, fruit = "apple", fruit = "banana")
#'
#' # If you make a mistake you'll get a warning
#' fct_recode(x, fruit = "apple", fruit = "bananana")
#'
#' # If you name the level NULL it will be removed
#' fct_recode(x, NULL = "apple", fruit = "banana")
#'
#' # Wrap the left hand side in quotes if it contains special variables
#' fct_recode(x, "an apple" = "apple", "a bear" = "bear")
#'
#' # When passing a named vector to rename levels use !!! to splice
#' x <- factor(c("apple", "bear", "banana", "dear"))
#' levels <- c(fruit = "apple", fruit = "banana")
#' fct_recode(x, !!!levels)
fct_recode <- function(.f, ...) {
  f <- check_factor(.f)

  new_levels <- check_recode_levels(...)

  # Remove any named NULL and finish if all NULLs
  nulls <- names(new_levels) == "NULL"
  if (any(nulls)) {
    f <- factor(f, levels = setdiff(levels(f), new_levels[nulls]))
    new_levels <- new_levels[!nulls]
  }

  lvls_revalue(f, lvls_rename(f, new_levels))
}

lvls_rename <- function(f, new_levels) {
  # Match old levels with new levels
  old_levels <- levels(f)
  idx <- match(new_levels, old_levels)

  # Handle levels that don't exist
  if (any(is.na(idx))) {
    bad <- new_levels[is.na(idx)]
    warning("Unknown levels in `f`: ", paste(bad, collapse = ", "), call. = FALSE)

    new_levels <- new_levels[!is.na(idx)]
    idx <- idx[!is.na(idx)]
  }

  old_levels[idx] <- names(new_levels)
  old_levels
}

check_recode_levels <- function(..., call = caller_env()) {
  levels <- list2(...)

  is_ok <- function(x) is.character(x) && length(x) == 1
  ok <- vapply(levels, is_ok, logical(1)) & names2(levels) != ""

  if (!all(ok)) {
    indx <- names2(levels)
    indx[indx == ""] <- seq_along(levels)[indx == ""]
    problems <- indx[!ok]

    cli::cli_abort(c(
      "Each element of {.arg ...} must be a named string.",
      i = "Problems with {length(problems)} argument{?s}: {problems}"
    ), call = call)
  }

  unlist(levels)
}