File: to_factor.R

package info (click to toggle)
r-cran-datawizard 1.0.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,300 kB
  • sloc: sh: 13; makefile: 2
file content (134 lines) | stat: -rw-r--r-- 3,543 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#' @title Convert data to factors
#' @name to_factor
#'
#' @details
#' Convert variables or data into factors. If the data is labelled, value labels
#' will be used as factor levels. The counterpart to convert variables into
#' numeric is `to_numeric()`.
#'
#' @param x A data frame or vector.
#' @param labels_to_levels Logical, if `TRUE`, value labels are used as factor
#' levels after `x` was converted to factor. Else, factor levels are based on
#' the values of `x` (i.e. as if using `as.factor()`).
#' @param ... Arguments passed to or from other methods.
#' @inheritParams extract_column_names
#' @inheritParams categorize
#'
#' @inheritSection center Selection of variables - the `select` argument
#'
#' @return A factor, or a data frame of factors.
#'
#' @note Factors are ignored and returned as is. If you want to use value labels
#' as levels for factors, use [`labels_to_levels()`] instead.
#'
#' @examples
#' str(to_factor(iris))
#'
#' # use labels as levels
#' data(efc)
#' str(efc$c172code)
#' head(to_factor(efc$c172code))
#' @export
to_factor <- function(x, ...) {
  UseMethod("to_factor")
}


#' @export
to_factor.default <- function(x, verbose = TRUE, ...) {
  if (isTRUE(verbose)) {
    insight::format_alert(
      sprintf("Converting into factors values currently not possible for variables of class `%s`.", class(x)[1])
    )
  }
  x
}

#' @export
to_factor.factor <- function(x, ...) {
  x
}

#' @rdname to_factor
#' @export
to_factor.numeric <- function(x, labels_to_levels = TRUE, verbose = TRUE, ...) {
  # preserve labels
  variable_label <- attr(x, "label", exact = TRUE)
  value_labels <- attr(x, "labels", exact = TRUE)

  # to factor
  x <- as.factor(x)

  # add back labels
  attr(x, "label") <- variable_label
  attr(x, "labels") <- value_labels

  # value labels to factor levels
  if (labels_to_levels) {
    x <- .value_labels_to_levels(x, verbose = verbose, ...)
  }
  x
}

#' @export
to_factor.logical <- to_factor.numeric

#' @export
to_factor.character <- to_factor.numeric

#' @export
to_factor.Date <- to_factor.numeric

#' @export
to_factor.haven_labelled <- to_factor.numeric

#' @export
to_factor.double <- to_factor.numeric

#' @rdname to_factor
#' @export
to_factor.data.frame <- function(x,
                                 select = NULL,
                                 exclude = NULL,
                                 ignore_case = FALSE,
                                 append = FALSE,
                                 regex = FALSE,
                                 verbose = TRUE,
                                 ...) {
  # validation check, return as is for complete factor
  if (all(vapply(x, is.factor, FUN.VALUE = logical(1L)))) {
    return(x)
  }

  # evaluate arguments
  select <- .select_nse(select,
    x,
    exclude,
    ignore_case,
    regex = regex,
    verbose = verbose
  )

  # when we append variables, we call ".process_append()", which will
  # create the new variables and updates "select", so new variables are processed
  if (!isFALSE(append)) {
    # drop factors, when append is not FALSE
    select <- colnames(x[select])[!vapply(x[select], is.factor, FUN.VALUE = logical(1L))]
    # process arguments
    my_args <- .process_append(
      x,
      select,
      append,
      append_suffix = "_f",
      keep_factors = FALSE,
      keep_character = TRUE,
      preserve_value_labels = TRUE
    )
    # update processed arguments
    x <- my_args$x
    select <- my_args$select
  }

  x[select] <- lapply(x[select], to_factor, verbose = verbose, ...)
  x
}