File: verb-expand.R

package info (click to toggle)
r-cran-dbplyr 2.3.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 2,376 kB
  • sloc: sh: 13; makefile: 2
file content (145 lines) | stat: -rw-r--r-- 4,859 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#' Expand SQL tables to include all possible combinations of values
#'
#' @description
#' This is a method for the [tidyr::expand] generics. It doesn't sort the
#' result explicitly, so the order might be different to what `expand()`
#' returns for data frames.
#'
#' @param data A lazy data frame backed by a database query.
#' @param ... Specification of columns to expand. See [tidyr::expand] for
#' more details.
#' @inheritParams tibble::as_tibble
#' @inherit arrange.tbl_lazy return
#' @examplesIf rlang::is_installed("tidyr", version = "1.0.0")
#' fruits <- memdb_frame(
#'   type   = c("apple", "orange", "apple", "orange", "orange", "orange"),
#'   year   = c(2010, 2010, 2012, 2010, 2010, 2012),
#'   size = c("XS", "S",  "M", "S", "S", "M"),
#'   weights = rnorm(6)
#' )
#'
#' # All possible combinations ---------------------------------------
#' fruits %>% tidyr::expand(type)
#' fruits %>% tidyr::expand(type, size)
#'
#' # Only combinations that already appear in the data ---------------
#' fruits %>% tidyr::expand(nesting(type, size))
expand.tbl_lazy <- function(data, ..., .name_repair = "check_unique") {
  dots <- purrr::discard(quos(...), quo_is_null)

  if (is_empty(dots)) {
    cli_abort("Must supply variables in `...`")
  }

  distinct_tbl_vars <- purrr::map(dots, extract_expand_dot_vars, call = current_env())

  # now that `nesting()` has been unpacked resolve name conflicts
  out_names <- names(exprs_auto_name(purrr::flatten(distinct_tbl_vars)))
  out_names_repaired <- vctrs::vec_as_names(out_names, repair = .name_repair)

  ns <- lengths(distinct_tbl_vars)
  indices <- vctrs::vec_rep_each(seq_along(distinct_tbl_vars), ns)
  out_names_list <- vctrs::vec_split(out_names_repaired, indices)$val

  distinct_tables <- purrr::map2(
    distinct_tbl_vars, out_names_list,
    ~ {
      args <- set_names(.x, .y)
      distinct(data, !!!args)
    }
  )

  by <- group_vars(data)
  if (is_empty(by)) {
    # We want a cross join if `by` is empty which is done with
    # a `full_join()` and `by = character()`
    purrr::reduce(distinct_tables, full_join, by = character())
  } else {
    # In this case a `full_join()` and a `left_join()` produce the same result
    # but a `left_join()` produces much nicer SQL for SQLite
    purrr::reduce(distinct_tables, left_join, by = group_vars(data))
  }
}

extract_expand_dot_vars <- function(dot, call) {
  # ugly hack to deal with `nesting()`
  if (!quo_is_call(dot, name = "nesting", ns = c("", "tidyr"))) {
    return(list(quo_get_expr(dot)))
  }

  x_expr <- quo_get_expr(dot)
  args <- call_args(x_expr)

  repair <- args[[".name_repair"]] %||% "check_unique"
  args[[".name_repair"]] <- NULL

  args_named <- exprs_auto_name(args)
  nms <- vctrs::vec_as_names(names(args_named), repair = repair, call = call)
  exprs(!!!set_names(args_named, nms))
}

#' Complete a SQL table with missing combinations of data
#'
#' Turns implicit missing values into explicit missing values. This is a method
#' for the [tidyr::complete()] generic.
#'
#' @inheritParams expand.tbl_lazy
#' @param fill A named list that for each variable supplies a single value to
#' use instead of NA for missing combinations.
#'
#' @inherit arrange.tbl_lazy return
#'
#' @examplesIf rlang::is_installed("tidyr", version = "1.0.0")
#' df <- memdb_frame(
#'   group = c(1:2, 1),
#'   item_id = c(1:2, 2),
#'   item_name = c("a", "b", "b"),
#'   value1 = 1:3,
#'   value2 = 4:6
#' )
#'
#' df %>% tidyr::complete(group, nesting(item_id, item_name))
#'
#' # You can also choose to fill in missing values
#' df %>% tidyr::complete(group, nesting(item_id, item_name), fill = list(value1 = 0))
complete.tbl_lazy <- function(data, ..., fill = list()) {
  full <- tidyr::expand(data, ...)

  full <- full_join(full, data, by = colnames(full))
  tidyr::replace_na(full, replace = fill)
}

#' Replace NAs with specified values
#'
#' This is a method for the [tidyr::replace_na()] generic.
#'
#' @param data A pair of lazy data frame backed by database queries.
#' @param replace A named list of values, with one value for each column that
#' has NA values to be replaced.
#' @param ... Unused; included for compatibility with generic.
#'
#' @inherit arrange.tbl_lazy return
#'
#' @examplesIf rlang::is_installed("tidyr", version = "1.0.0")
#' df <- memdb_frame(x = c(1, 2, NA), y = c("a", NA, "b"))
#' df %>% tidyr::replace_na(list(x = 0, y = "unknown"))
replace_na.tbl_lazy <- function(data, replace = list(), ...) {
  stopifnot(is_list(replace))
  stopifnot(is_empty(replace) || is_named(replace))
  replace <- replace[names(replace) %in% colnames(data)]

  if (is_empty(replace)) {
    return(data)
  }

  coalesce_expr <- purrr::imap(
    replace,
    function(value, col) {
      quo(coalesce(!!sym(col), !!value))
    }
  )

  mutate(data, !!!coalesce_expr)
}

globalVariables(c("coalesce", "expand", "replace_na"))