1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
#' Subset rows using column values
#'
#' This is a method for the dplyr [filter()] generic. It generates the
#' `WHERE` clause of the SQL query.
#'
#' @inheritParams arrange.tbl_lazy
#' @inheritParams dplyr::filter
#' @inheritParams args_by
#' @param .preserve Not supported by this method.
#' @inherit arrange.tbl_lazy return
#' @examples
#' library(dplyr, warn.conflicts = FALSE)
#'
#' db <- memdb_frame(x = c(2, NA, 5, NA, 10), y = 1:5)
#' db %>% filter(x < 5) %>% show_query()
#' db %>% filter(is.na(x)) %>% show_query()
# registered onLoad
#' @importFrom dplyr filter
filter.tbl_lazy <- function(.data, ..., .by = NULL, .preserve = FALSE) {
if (!identical(.preserve, FALSE)) {
cli_abort("{.arg .preserve} is not supported on database backends")
}
check_filter(...)
by <- compute_by(
{{ .by }},
.data,
by_arg = ".by",
data_arg = ".data",
error_call = caller_env()
)
if (by$from_by) {
.data$lazy_query$group_vars <- by$names
}
dots <- partial_eval_dots(.data, ..., .named = FALSE)
if (is_empty(dots)) {
return(.data)
}
.data$lazy_query <- add_filter(.data, dots)
if (by$from_by) {
.data$lazy_query$group_vars <- character()
}
.data
}
add_filter <- function(.data, dots) {
con <- remote_con(.data)
lazy_query <- .data$lazy_query
dots <- unname(dots)
dots_use_window_fun <- uses_window_fun(dots, con)
if (filter_can_use_having(lazy_query, dots_use_window_fun)) {
return(filter_via_having(lazy_query, dots))
}
if (!uses_window_fun(dots, con)) {
if (uses_mutated_vars(dots, lazy_query$select)) {
lazy_select_query(
x = lazy_query,
where = dots
)
} else {
exprs <- lazy_query$select$expr
nms <- lazy_query$select$name
projection <- purrr::map2_lgl(exprs, nms, ~ is_symbol(.x) && !identical(.x, sym(.y)))
if (any(projection)) {
dots <- purrr::map(dots, replace_sym, nms[projection], exprs[projection])
}
lazy_query$where <- c(lazy_query$where, dots)
lazy_query
}
} else {
# Do partial evaluation, then extract out window functions
where <- translate_window_where_all(dots, ls(dbplyr_sql_translation(con)$window))
# Add extracted window expressions as columns
mutated <- mutate(.data, !!!where$comp)
# And filter with the modified `where` using the new columns
original_vars <- op_vars(.data)
lazy_select_query(
x = mutated$lazy_query,
select = syms(set_names(original_vars)),
where = where$expr
)
}
}
filter_can_use_having <- function(lazy_query, dots_use_window_fun) {
# From the Postgres documentation: https://www.postgresql.org/docs/current/sql-select.html#SQL-HAVING
# Each column referenced in condition must unambiguously reference a grouping
# column, unless the reference appears within an aggregate function or the
# ungrouped column is functionally dependent on the grouping columns.
# After `summarise()` every column is either
# * a grouping column
# * or an aggregated column
# (this is not the case for data frames but valid for SQL tables)
#
# Therefore, if `filter()` does not use a window function, then we only use
# grouping or aggregated columns
if (dots_use_window_fun) {
return(FALSE)
}
if (!inherits(lazy_query, "lazy_select_query")) {
return(FALSE)
}
lazy_query$select_operation == "summarise"
}
filter_via_having <- function(lazy_query, dots) {
names <- lazy_query$select$name
exprs <- lazy_query$select$expr
dots <- purrr::map(dots, replace_sym, names, exprs)
lazy_query$having <- c(lazy_query$having, dots)
lazy_query
}
check_filter <- function(...) {
dots <- enquos(...)
named <- have_name(dots)
for (i in which(named)) {
quo <- dots[[i]]
# Unlike in `dplyr` named logical vectors do not make sense so they are
# also not allowed
expr <- quo_get_expr(quo)
cli_abort(c(
"Problem with {.fun filter} input `..{i}`.",
x = "Input `..{i}` is named.",
i = "This usually means that you've used {.code =} instead of {.code ==}.",
i = "Did you mean `{names(dots)[i]} == {as_label(expr)}`?"
), call = caller_env())
}
}
|