File: data_replicate.R

package info (click to toggle)
r-cran-datawizard 1.0.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,300 kB
  • sloc: sh: 13; makefile: 2
file content (120 lines) | stat: -rw-r--r-- 3,857 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#' @title Expand (i.e. replicate rows) a data frame
#' @name data_replicate
#'
#' @description
#' Expand a data frame by replicating rows based on another variable that
#' contains the counts of replications per row.
#'
#' @param data A data frame.
#' @param expand The name of the column that contains the counts of replications
#' for each row. Can also be a numeric value, indicating the position of that
#' column. Note that the variable indicated by `expand` must be an integer vector.
#' @param remove_na Logical. If `TRUE`, missing values in the column
#' provided in `expand` are removed from the data frame. If `FALSE` and `expand`
#' contains missing values, the function will throw an error.
#' @param ... Currently not used.
#' @inheritParams extract_column_names
#'
#' @return A dataframe with each row replicated as many times as defined in `expand`.
#'
#' @examples
#' data(mtcars)
#' data_replicate(head(mtcars), "carb")
#' @export
data_replicate <- function(data,
                           expand = NULL,
                           select = NULL,
                           exclude = NULL,
                           remove_na = FALSE,
                           ignore_case = FALSE,
                           verbose = TRUE,
                           regex = FALSE,
                           ...) {
  # we need a name for the new column
  if (is.null(expand)) {
    insight::format_error(
      "No column that should be used to expand the data frame was provided. Please use `expand` to define a column."
    )
  }

  # only one column name
  if (length(expand) > 1) {
    insight::format_error(
      "Please provide only a single string for `expand`, no character vector with multiple values."
    )
  }

  # check if numerics, and if so, use column name
  if (is.numeric(expand)) {
    expand <- colnames(data)[expand]
  }

  # check if in data
  if (!expand %in% colnames(data)) {
    insight::format_error(
      "The column provided in `expand` does not exist in the data frame.",
      .misspelled_string(colnames(data), expand, "Possibly misspelled?")
    )
  }

  # check that "expand" contains no Inf
  if (any(is.infinite(data[[expand]]))) {
    insight::format_error(
      "The column provided in `expand` contains infinite values. Please provide a column that does not contain infinite values." # nolint
    )
  }

  # check that "expand" is integer
  if (!.is_integer(data[[expand]])) {
    insight::format_error(
      "The column provided in `expand` is not of type integer. Please provide a column that contains integer values." # nolint
    )
  }

  # evaluate select/exclude, may be select-helpers
  select <- .select_nse(select,
    data,
    exclude,
    ignore_case,
    regex = regex,
    verbose = verbose
  )

  # extract variable that contains the counts of replicates
  replicates <- data[[expand]]
  # we can remove that column now
  data[[expand]] <- NULL

  # also remove "expand" from "select" string
  select <- setdiff(select, expand)

  # if user doesn't want to remove "NA", but replicates contain "NA",
  # give informative error here
  if (!remove_na && anyNA(replicates)) {
    insight::format_error(
      "The column provided in `expand` contains missing values, but `remove_na` is set to `FALSE`.",
      "Please set `remove_na` to `TRUE` or remove the missing values from the `expand` variable."
    )
  }

  # remove rows where "expand" is NA
  data <- data[!is.na(replicates), ]
  replicates <- replicates[!is.na(replicates)]

  # fin
  as.data.frame(do.call(cbind, lapply(data[select], rep.int, times = replicates)))
}


# is.integer(c(1, 2)) -> FALSE
# all(c(1, 2) %% 1 == 0) -> TRUE
.is_integer <- function(x, remove_na = TRUE) {
  if (remove_na) {
    x <- x[!is.na(x)]
  }
  tryCatch(
    all(x %% 1 == 0),
    warning = function(w) is.integer(x),
    error = function(e) FALSE
  )
}