File: verb-arrange.R

package info (click to toggle)
r-cran-dbplyr 2.3.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 2,376 kB
  • sloc: sh: 13; makefile: 2
file content (71 lines) | stat: -rw-r--r-- 2,116 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#' Arrange rows by column values
#'
#' @description
#' This is an method for the dplyr [arrange()] generic. It generates
#' the `ORDER BY` clause of the SQL query. It also affects the
#' [window_order()] of windowed expressions in [mutate.tbl_lazy()].
#'
#' Note that `ORDER BY` clauses can not generally appear in subqueries, which
#' means that you should `arrange()` as late as possible in your pipelines.
#'
#' @section Missing values:
#' Unlike R, most databases sorts `NA` (`NULL`s) at the front. You can
#' can override this behaviour by explicitly sorting on `is.na(x)`.
#'
#' @param .data A lazy data frame backed by a database query.
#' @inheritParams dplyr::arrange
#' @return Another `tbl_lazy`. Use [show_query()] to see the generated
#'   query, and use [`collect()`][collect.tbl_sql] to execute the query
#'   and return data to R.
#' @examples
#' library(dplyr, warn.conflicts = FALSE)
#'
#' db <- memdb_frame(a = c(3, 4, 1, 2), b = c(5, 1, 2, NA))
#' db %>% arrange(a) %>% show_query()
#'
#' # Note that NAs are sorted first
#' db %>% arrange(b)
#' # override by sorting on is.na() first
#' db %>% arrange(is.na(b), b)
#' @export
#' @importFrom dplyr arrange
arrange.tbl_lazy <- function(.data, ..., .by_group = FALSE) {
  dots <- partial_eval_dots(.data, ..., .named = FALSE)
  names(dots) <- NULL

  .data$lazy_query <- add_arrange(.data, dots, .by_group)
  .data
}

add_arrange <- function(.data, dots, .by_group) {
  lazy_query <- .data$lazy_query

  if (.by_group) {
    dots <- c(syms(op_grps(lazy_query)), dots)
  }
  if (identical(dots, lazy_query$order_vars)) {
    return(lazy_query)
  }

  # `dots` must be an empty list so that `arrange()` removes the `order_vars`
  dots <- dots %||% list()

  new_lazy_query <- lazy_select_query(
    x = lazy_query,
    order_by = dots,
    order_vars = dots
  )

  if (!inherits(lazy_query, "lazy_select_query")) {
    return(new_lazy_query)
  }

  # Needed because `ORDER BY` is evaluated before `LIMIT`
  if (!is.null(lazy_query$limit)) {
    return(new_lazy_query)
  }

  lazy_query$order_vars <- dots
  lazy_query$order_by <- dots
  lazy_query
}