File: utils_clustering.R

package info (click to toggle)
r-cran-parameters 0.24.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,852 kB
  • sloc: sh: 16; makefile: 2
file content (48 lines) | stat: -rw-r--r-- 1,306 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# Utils -------------------------------------------------------------------

#' @keywords internal
.prepare_data_clustering <- function(x,
                                     include_factors = FALSE,
                                     standardize = FALSE,
                                     preprocess = TRUE,
                                     ...) {
  if (isFALSE(preprocess)) {
    return(x)
  }

  # include factors?
  if (include_factors) {
    # ordered factors to numeric
    factors <- vapply(x, is.ordered, TRUE)
    if (any(factors)) {
      x[factors] <- sapply(
        x[factors],
        datawizard::to_numeric,
        dummy_factors = FALSE,
        preserve_levels = TRUE
      )
    }

    # character and factors to dummies
    factors <- sapply(x, function(i) is.character(i) | is.factor(i))

    if (any(factors)) {
      dummies <- lapply(x[factors], .factor_to_dummy)
      x <- cbind(x[!factors], dummies)
    }
  } else {
    # remove factors
    x <- x[vapply(x, is.numeric, TRUE)]
  }

  # Remove all missing values from data, only use numerics
  x <- stats::na.omit(x)

  if (isTRUE(standardize)) {
    x <- datawizard::standardize(x, ...)
    # remove "dw_transformer" attribute
    x[] <- lapply(x, as.numeric)
  }

  x
}