File: inner_combine_linter.R

package info (click to toggle)
r-cran-lintr 3.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,396 kB
  • sloc: sh: 13; xml: 10; makefile: 2
file content (132 lines) | stat: -rw-r--r-- 4,882 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#' Require `c()` to be applied before relatively expensive vectorized functions
#'
#' `as.Date(c(a, b))` is logically equivalent to `c(as.Date(a), as.Date(b))`.
#'   The same equivalence holds for several other vectorized functions like
#'   [as.POSIXct()] and math functions like [sin()]. The former is to be
#'   preferred so that the most expensive part of the operation ([as.Date()])
#'   is applied only once.
#'
#' Note that [strptime()] has one idiosyncrasy to be aware of, namely that
#'   auto-detected `format=` is set by the first matching input, which means
#'   that a case like `c(as.POSIXct("2024-01-01"), as.POSIXct("2024-01-01 01:02:03"))`
#'   gives different results to `as.POSIXct(c("2024-01-01", "2024-01-01 01:02:03"))`.
#'   This false positive is rare; a workaround where possible is to use
#'   consistent formatting, i.e., `"2024-01-01 00:00:00"` in the example.
#'
#' @examples
#' # will produce lints
#' lint(
#'   text = "c(log10(x), log10(y), log10(z))",
#'   linters = inner_combine_linter()
#' )
#'
#' # okay
#' lint(
#'   text = "log10(c(x, y, z))",
#'   linters = inner_combine_linter()
#' )
#'
#' lint(
#'   text = "c(log(x, base = 10), log10(x, base = 2))",
#'   linters = inner_combine_linter()
#' )
#'
#' @evalRd rd_tags("inner_combine_linter")
#' @seealso [linters] for a complete list of linters available in lintr.
#' @export
inner_combine_linter <- function() {
  # these don't take any other arguments (except maybe by non-default
  #   methods), so don't need to check equality of other arguments
  no_arg_vectorized_funs <- c(
    "sin", "cos", "tan", "sinpi", "cospi", "tanpi", "asin", "acos", "atan",
    "log2", "log10", "log1p", "exp", "expm1",
    "sqrt", "abs"
  )

  # TODO(#2468): Try and make this XPath less brittle/more extensible.

  # See ?as.Date, ?as.POSIXct. tryFormats is not explicitly in any default
  #   POSIXct method, but it is in as.Date.character and as.POSIXlt.character --
  #   the latter is what actually gets invoked when running as.POSIXct
  #   on a character. So it is indeed an argument by pass-through.
  date_args <- c("format", "origin", "tz", "tryFormats")
  date_funs <- c("as.Date", "as.POSIXct", "as.POSIXlt")

  # See ?log. Only these two take a 'base' argument.
  log_funs <- c("log", "logb")
  log_args <- "base"

  # See ?lubridate::ymd and ?lubridate::ymd_hms
  lubridate_args <- c("quiet", "tz", "locale", "truncated")
  lubridate_funs <- c(
    "ymd", "ydm", "mdy", "myd", "dmy", "dym",
    "yq", "ym", "my",
    "ymd_hms", "ymd_hm", "ymd_h", "dmy_hms", "dmy_hm", "dmy_h",
    "mdy_hms", "mdy_hm", "mdy_h", "ydm_hms", "ydm_hm", "ydm_h"
  )

  date_args_cond <- build_arg_condition(date_funs, date_args)
  log_args_cond <- build_arg_condition(log_funs, log_args)
  lubridate_args_cond <- build_arg_condition(lubridate_funs, lubridate_args)

  c_expr_cond <- xp_and(
    sprintf(
      "expr[1][SYMBOL_FUNCTION_CALL[%s]]",
      xp_text_in_table(c(no_arg_vectorized_funs, date_funs, log_funs, lubridate_funs))
    ),
    "not(following-sibling::expr[not(expr[1][SYMBOL_FUNCTION_CALL])])",
    "not(expr[1]/SYMBOL_FUNCTION_CALL != following-sibling::expr/expr[1]/SYMBOL_FUNCTION_CALL)",
    date_args_cond,
    log_args_cond,
    lubridate_args_cond
  )
  xpath <- glue("
  self::*[count(following-sibling::expr) > 1]
    /following-sibling::expr[1][ {c_expr_cond} ]
    /parent::expr
  ")

  Linter(linter_level = "expression", function(source_expression) {
    xml_calls <- source_expression$xml_find_function_calls("c")
    bad_expr <- xml_find_all(xml_calls, xpath)

    matched_call <- xp_call_name(bad_expr, depth = 2L)
    lint_message <- paste(
      "Combine inputs to vectorized functions first to take full advantage of vectorization, e.g.,",
      sprintf(
        "%1$s(c(x, y)) only runs the more expensive %1$s() once as compared to c(%1$s(x), %1$s(y)).",
        matched_call
      )
    )
    xml_nodes_to_lints(bad_expr, source_expression = source_expression, lint_message, type = "warning")
  })
}

#' Make the XPath condition ensuring an argument matches across calls
#'
#' @param arg Character scalar naming an argument
#' @noRd
arg_match_condition <- function(arg) {
  this_symbol <- sprintf("SYMBOL_SUB[text() = '%s']", arg)
  following_symbol <- sprintf("following-sibling::expr/%s", this_symbol)
  next_expr <- "following-sibling::expr[1]"
  xp_or(
    sprintf("not(%s) and not(%s)", this_symbol, following_symbol),
    xp_and(
      this_symbol,
      following_symbol,
      sprintf(
        "not(%1$s/%3$s != %2$s/%3$s)",
        this_symbol, following_symbol, next_expr
      )
    )
  )
}

build_arg_condition <- function(calls, arguments) {
  xp_or(
    sprintf("not(expr[1][SYMBOL_FUNCTION_CALL[%s]])", xp_text_in_table(calls)),
    "not(EQ_SUB) and not(following-sibling::expr/EQ_SUB)",
    xp_and(vapply(arguments, arg_match_condition, character(1L)))
  )
}