File: nzchar_linter.R

package info (click to toggle)
r-cran-lintr 3.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,396 kB
  • sloc: sh: 13; xml: 10; makefile: 2
file content (157 lines) | stat: -rw-r--r-- 5,313 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#' Require usage of nzchar where appropriate
#'
#' [nzchar()] efficiently determines which of a vector of strings are empty
#'   (i.e., are `""`). It should in most cases be used instead of
#'   constructions like `string == ""` or `nchar(string) == 0`.
#'
#' One crucial difference is in the default handling of `NA_character_`, i.e.,
#'   missing strings. `nzchar(NA_character_)` is `TRUE`, while `NA_character_ == ""`
#'   and `nchar(NA_character_) == 0` are both `NA`. Therefore, for strict
#'   compatibility, use `nzchar(x, keepNA = TRUE)`. If the input is known to be
#'   complete (no missing entries), this argument can be dropped for conciseness.
#'
#' @examples
#' # will produce lints
#' lint(
#'   text = "x[x == '']",
#'   linters = nzchar_linter()
#' )
#'
#' lint(
#'   text = "x[nchar(x) > 0]",
#'   linters = nzchar_linter()
#' )
#'
#' # okay
#' lint(
#'   text = "x[!nzchar(x, keepNA = TRUE)]",
#'   linters = nzchar_linter()
#' )
#'
#' lint(
#'   text = "x[nzchar(x, keepNA = TRUE)]",
#'   linters = nzchar_linter()
#' )
#'
#' @evalRd rd_tags("nzchar_linter")
#' @seealso [linters] for a complete list of linters available in lintr.
#' @export
nzchar_linter <- function() {
  comparator_nodes <- infix_metadata$xml_tag[infix_metadata$comparator]

  # use string-length to capture both "" and ''
  # if (any(x == "")) is not treated like it's part of if(), but
  #   any(if (x == "") y else z) _is_ treated so. this condition looks for the
  #   expr to be inside a call that's _not_ above an IF/WHILE.
  comparison_xpath <- glue("
  //STR_CONST[string-length(text()) = 2]
    /parent::expr
    /parent::expr[
      ({ xp_or(comparator_nodes) })
      and (
        not(ancestor-or-self::expr[
          preceding-sibling::IF
          or preceding-sibling::WHILE
        ])
        or ancestor-or-self::expr[
          (
            preceding-sibling::expr/SYMBOL_FUNCTION_CALL
            or preceding-sibling::OP-LEFT-BRACKET
          ) and not(
            descendant-or-self::expr[IF or WHILE]
          )
        ]
      )
    ]
  ")

  comparison_msg_map <- c(
    GT = 'Use nzchar(x) instead of x > "". ',
    NE = 'Use nzchar(x) instead of x != "". ',
    LE = 'Use !nzchar(x) instead of x <= "". ',
    EQ = 'Use !nzchar(x) instead of x == "". ',
    GE = 'x >= "" is always true, maybe you want nzchar(x)? ',
    LT = 'x < "" is always false, maybe you want !nzchar(x)? '
  )

  # nchar(., type="width") not strictly compatible with nzchar
  # unsure allowNA compatible, so allow it just in case (see TODO in tests)
  nchar_xpath <- glue("
  parent::expr
    /parent::expr[
      ({ xp_or(comparator_nodes) })
      and not(expr/SYMBOL_SUB[
        (
          text() = 'type'
          and following-sibling::expr[1]/STR_CONST[contains(text(), 'width')]
        ) or (
          text() = 'allowNA'
          and following-sibling::expr[1]/NUM_CONST[text() = 'TRUE']
        )
      ])
      and expr/NUM_CONST[text() = '0' or text() = '0L' or text() = '0.0']
    ]
  ")

  nchar_msg_map <- c(
    GT = "Use nzchar(x) instead of nchar(x) > 0. ",
    NE = "Use nzchar(x) instead of nchar(x) != 0. ",
    LE = "Use !nzchar(x) instead of nchar(x) <= 0. ",
    EQ = "Use !nzchar(x) instead of nchar(x) == 0. ",
    GE = "nchar(x) >= 0 is always true, maybe you want nzchar(x)? ",
    LT = "nchar(x) < 0 is always false, maybe you want !nzchar(x)? "
  )

  keepna_note <- paste(
    "Whenever missing data is possible,",
    "please take care to use nzchar(., keepNA = TRUE);",
    "nzchar(NA) is TRUE by default."
  )

  # For ordered operators like '>', we need to give the message for
  #   its "opposite" (not inverse) if the bad usage is on the RHS,
  #   e.g. 0 < nchar(x) has to be treated as nchar(x) > 0.
  op_for_msg <- function(expr, const) {
    op <- xml_name(xml_find_first(expr, "*[2]"))
    maybe_needs_flip <- !is.na(xml_find_first(expr, sprintf("*[1][%s]", const)))

    ordered_ops <- c("GT", "GE", "LE", "LT")
    ordered_idx <- match(op, ordered_ops)

    needs_flip <- maybe_needs_flip & !is.na(ordered_idx)
    # un-benchmarked, but should be faster (though less readable) as
    # > ordered_ops[5L - ordered_idx[needs_flip]]
    op[needs_flip] <- rev(ordered_ops)[ordered_idx[needs_flip]]
    op
  }

  Linter(linter_level = "expression", function(source_expression) {
    xml <- source_expression$xml_parsed_content

    comparison_expr <- xml_find_all(xml, comparison_xpath)
    comparison_op <- op_for_msg(comparison_expr, const = "STR_CONST")
    comparison_lints <- xml_nodes_to_lints(
      comparison_expr,
      source_expression = source_expression,
      lint_message = paste0(
        comparison_msg_map[comparison_op],
        "Note that unlike nzchar(), ", comparison_op, " coerces to character, ",
        "so you'll have to use as.character() if x is a factor. ",
        keepna_note
      ),
      type = "warning"
    )

    xml_calls <- source_expression$xml_find_function_calls("nchar")
    nchar_expr <- xml_find_all(xml_calls, nchar_xpath)
    nchar_op <- op_for_msg(nchar_expr, const = "NUM_CONST")
    nchar_lints <- xml_nodes_to_lints(
      nchar_expr,
      source_expression = source_expression,
      lint_message = paste0(nchar_msg_map[nchar_op], keepna_note),
      type = "warning"
    )

    c(comparison_lints, nchar_lints)
  })
}