1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
|
#' @title Convert data to factors
#' @name to_factor
#'
#' @details
#' Convert variables or data into factors. If the data is labelled, value labels
#' will be used as factor levels. The counterpart to convert variables into
#' numeric is `to_numeric()`.
#'
#' @param x A data frame or vector.
#' @param labels_to_levels Logical, if `TRUE`, value labels are used as factor
#' levels after `x` was converted to factor. Else, factor levels are based on
#' the values of `x` (i.e. as if using `as.factor()`).
#' @param ... Arguments passed to or from other methods.
#' @inheritParams extract_column_names
#' @inheritParams categorize
#'
#' @inheritSection center Selection of variables - the `select` argument
#'
#' @return A factor, or a data frame of factors.
#'
#' @note Factors are ignored and returned as is. If you want to use value labels
#' as levels for factors, use [`labels_to_levels()`] instead.
#'
#' @examples
#' str(to_factor(iris))
#'
#' # use labels as levels
#' data(efc)
#' str(efc$c172code)
#' head(to_factor(efc$c172code))
#' @export
to_factor <- function(x, ...) {
UseMethod("to_factor")
}
#' @export
to_factor.default <- function(x, verbose = TRUE, ...) {
if (isTRUE(verbose)) {
insight::format_alert(
sprintf("Converting into factors values currently not possible for variables of class `%s`.", class(x)[1])
)
}
x
}
#' @export
to_factor.factor <- function(x, ...) {
x
}
#' @rdname to_factor
#' @export
to_factor.numeric <- function(x, labels_to_levels = TRUE, verbose = TRUE, ...) {
# preserve labels
variable_label <- attr(x, "label", exact = TRUE)
value_labels <- attr(x, "labels", exact = TRUE)
# to factor
x <- as.factor(x)
# add back labels
attr(x, "label") <- variable_label
attr(x, "labels") <- value_labels
# value labels to factor levels
if (labels_to_levels) {
x <- .value_labels_to_levels(x, verbose = verbose, ...)
}
x
}
#' @export
to_factor.logical <- to_factor.numeric
#' @export
to_factor.character <- to_factor.numeric
#' @export
to_factor.Date <- to_factor.numeric
#' @export
to_factor.haven_labelled <- to_factor.numeric
#' @export
to_factor.double <- to_factor.numeric
#' @rdname to_factor
#' @export
to_factor.data.frame <- function(x,
select = NULL,
exclude = NULL,
ignore_case = FALSE,
append = FALSE,
regex = FALSE,
verbose = TRUE,
...) {
# validation check, return as is for complete factor
if (all(vapply(x, is.factor, FUN.VALUE = logical(1L)))) {
return(x)
}
# evaluate arguments
select <- .select_nse(select,
x,
exclude,
ignore_case,
regex = regex,
verbose = verbose
)
# when we append variables, we call ".process_append()", which will
# create the new variables and updates "select", so new variables are processed
if (!isFALSE(append)) {
# drop factors, when append is not FALSE
select <- colnames(x[select])[!vapply(x[select], is.factor, FUN.VALUE = logical(1L))]
# process arguments
my_args <- .process_append(
x,
select,
append,
append_suffix = "_f",
keep_factors = FALSE,
keep_character = TRUE,
preserve_value_labels = TRUE
)
# update processed arguments
x <- my_args$x
select <- my_args$select
}
x[select] <- lapply(x[select], to_factor, verbose = verbose, ...)
x
}
|