File: verb-filter.R

package info (click to toggle)
r-cran-dbplyr 2.3.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 2,376 kB
  • sloc: sh: 13; makefile: 2
file content (145 lines) | stat: -rw-r--r-- 4,187 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#' Subset rows using column values
#'
#' This is a method for the dplyr [filter()] generic. It generates the
#' `WHERE` clause of the SQL query.
#'
#' @inheritParams arrange.tbl_lazy
#' @inheritParams dplyr::filter
#' @inheritParams args_by
#' @param .preserve Not supported by this method.
#' @inherit arrange.tbl_lazy return
#' @examples
#' library(dplyr, warn.conflicts = FALSE)
#'
#' db <- memdb_frame(x = c(2, NA, 5, NA, 10), y = 1:5)
#' db %>% filter(x < 5) %>% show_query()
#' db %>% filter(is.na(x)) %>% show_query()
# registered onLoad
#' @importFrom dplyr filter
filter.tbl_lazy <- function(.data, ..., .by = NULL, .preserve = FALSE) {
  if (!identical(.preserve, FALSE)) {
    cli_abort("{.arg .preserve} is not supported on database backends")
  }
  check_filter(...)
  by <- compute_by(
    {{ .by }},
    .data,
    by_arg = ".by",
    data_arg = ".data",
    error_call = caller_env()
  )
  if (by$from_by) {
    .data$lazy_query$group_vars <- by$names
  }

  dots <- partial_eval_dots(.data, ..., .named = FALSE)

  if (is_empty(dots)) {
    return(.data)
  }

  .data$lazy_query <- add_filter(.data, dots)
  if (by$from_by) {
    .data$lazy_query$group_vars <- character()
  }
  .data
}

add_filter <- function(.data, dots) {
  con <- remote_con(.data)
  lazy_query <- .data$lazy_query
  dots <- unname(dots)

  dots_use_window_fun <- uses_window_fun(dots, con)

  if (filter_can_use_having(lazy_query, dots_use_window_fun)) {
    return(filter_via_having(lazy_query, dots))
  }

  if (!uses_window_fun(dots, con)) {
    if (uses_mutated_vars(dots, lazy_query$select)) {
      lazy_select_query(
        x = lazy_query,
        where = dots
      )
    } else {
      exprs <- lazy_query$select$expr
      nms <- lazy_query$select$name
      projection <- purrr::map2_lgl(exprs, nms, ~ is_symbol(.x) && !identical(.x, sym(.y)))

      if (any(projection)) {
        dots <- purrr::map(dots, replace_sym, nms[projection], exprs[projection])
      }

      lazy_query$where <- c(lazy_query$where, dots)
      lazy_query
    }
  } else {
    # Do partial evaluation, then extract out window functions
    where <- translate_window_where_all(dots, ls(dbplyr_sql_translation(con)$window))

    # Add extracted window expressions as columns
    mutated <- mutate(.data, !!!where$comp)

    # And filter with the modified `where` using the new columns
    original_vars <- op_vars(.data)
    lazy_select_query(
      x = mutated$lazy_query,
      select = syms(set_names(original_vars)),
      where = where$expr
    )
  }
}

filter_can_use_having <- function(lazy_query, dots_use_window_fun) {
  # From the Postgres documentation: https://www.postgresql.org/docs/current/sql-select.html#SQL-HAVING
  # Each column referenced in condition must unambiguously reference a grouping
  # column, unless the reference appears within an aggregate function or the
  # ungrouped column is functionally dependent on the grouping columns.

  # After `summarise()` every column is either
  # * a grouping column
  # * or an aggregated column
  # (this is not the case for data frames but valid for SQL tables)
  #
  # Therefore, if `filter()` does not use a window function, then we only use
  # grouping or aggregated columns

  if (dots_use_window_fun) {
    return(FALSE)
  }

  if (!inherits(lazy_query, "lazy_select_query")) {
    return(FALSE)
  }

  lazy_query$select_operation == "summarise"
}

filter_via_having <- function(lazy_query, dots) {
  names <- lazy_query$select$name
  exprs <- lazy_query$select$expr
  dots <- purrr::map(dots, replace_sym, names, exprs)

  lazy_query$having <- c(lazy_query$having, dots)
  lazy_query
}

check_filter <- function(...) {
  dots <- enquos(...)
  named <- have_name(dots)

  for (i in which(named)) {
    quo <- dots[[i]]

    # Unlike in `dplyr` named logical vectors do not make sense so they are
    # also not allowed
    expr <- quo_get_expr(quo)
    cli_abort(c(
      "Problem with {.fun filter} input `..{i}`.",
      x = "Input `..{i}` is named.",
      i = "This usually means that you've used {.code =} instead of {.code ==}.",
      i = "Did you mean `{names(dots)[i]} == {as_label(expr)}`?"
    ), call = caller_env())
  }
}