1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
|
#' Require usage of nzchar where appropriate
#'
#' [nzchar()] efficiently determines which of a vector of strings are empty
#' (i.e., are `""`). It should in most cases be used instead of
#' constructions like `string == ""` or `nchar(string) == 0`.
#'
#' One crucial difference is in the default handling of `NA_character_`, i.e.,
#' missing strings. `nzchar(NA_character_)` is `TRUE`, while `NA_character_ == ""`
#' and `nchar(NA_character_) == 0` are both `NA`. Therefore, for strict
#' compatibility, use `nzchar(x, keepNA = TRUE)`. If the input is known to be
#' complete (no missing entries), this argument can be dropped for conciseness.
#'
#' @examples
#' # will produce lints
#' lint(
#' text = "x[x == '']",
#' linters = nzchar_linter()
#' )
#'
#' lint(
#' text = "x[nchar(x) > 0]",
#' linters = nzchar_linter()
#' )
#'
#' # okay
#' lint(
#' text = "x[!nzchar(x, keepNA = TRUE)]",
#' linters = nzchar_linter()
#' )
#'
#' lint(
#' text = "x[nzchar(x, keepNA = TRUE)]",
#' linters = nzchar_linter()
#' )
#'
#' @evalRd rd_tags("nzchar_linter")
#' @seealso [linters] for a complete list of linters available in lintr.
#' @export
nzchar_linter <- function() {
comparator_nodes <- infix_metadata$xml_tag[infix_metadata$comparator]
# use string-length to capture both "" and ''
# if (any(x == "")) is not treated like it's part of if(), but
# any(if (x == "") y else z) _is_ treated so. this condition looks for the
# expr to be inside a call that's _not_ above an IF/WHILE.
comparison_xpath <- glue("
//STR_CONST[string-length(text()) = 2]
/parent::expr
/parent::expr[
({ xp_or(comparator_nodes) })
and (
not(ancestor-or-self::expr[
preceding-sibling::IF
or preceding-sibling::WHILE
])
or ancestor-or-self::expr[
(
preceding-sibling::expr/SYMBOL_FUNCTION_CALL
or preceding-sibling::OP-LEFT-BRACKET
) and not(
descendant-or-self::expr[IF or WHILE]
)
]
)
]
")
comparison_msg_map <- c(
GT = 'Use nzchar(x) instead of x > "". ',
NE = 'Use nzchar(x) instead of x != "". ',
LE = 'Use !nzchar(x) instead of x <= "". ',
EQ = 'Use !nzchar(x) instead of x == "". ',
GE = 'x >= "" is always true, maybe you want nzchar(x)? ',
LT = 'x < "" is always false, maybe you want !nzchar(x)? '
)
# nchar(., type="width") not strictly compatible with nzchar
# unsure allowNA compatible, so allow it just in case (see TODO in tests)
nchar_xpath <- glue("
parent::expr
/parent::expr[
({ xp_or(comparator_nodes) })
and not(expr/SYMBOL_SUB[
(
text() = 'type'
and following-sibling::expr[1]/STR_CONST[contains(text(), 'width')]
) or (
text() = 'allowNA'
and following-sibling::expr[1]/NUM_CONST[text() = 'TRUE']
)
])
and expr/NUM_CONST[text() = '0' or text() = '0L' or text() = '0.0']
]
")
nchar_msg_map <- c(
GT = "Use nzchar(x) instead of nchar(x) > 0. ",
NE = "Use nzchar(x) instead of nchar(x) != 0. ",
LE = "Use !nzchar(x) instead of nchar(x) <= 0. ",
EQ = "Use !nzchar(x) instead of nchar(x) == 0. ",
GE = "nchar(x) >= 0 is always true, maybe you want nzchar(x)? ",
LT = "nchar(x) < 0 is always false, maybe you want !nzchar(x)? "
)
keepna_note <- paste(
"Whenever missing data is possible,",
"please take care to use nzchar(., keepNA = TRUE);",
"nzchar(NA) is TRUE by default."
)
# For ordered operators like '>', we need to give the message for
# its "opposite" (not inverse) if the bad usage is on the RHS,
# e.g. 0 < nchar(x) has to be treated as nchar(x) > 0.
op_for_msg <- function(expr, const) {
op <- xml_name(xml_find_first(expr, "*[2]"))
maybe_needs_flip <- !is.na(xml_find_first(expr, sprintf("*[1][%s]", const)))
ordered_ops <- c("GT", "GE", "LE", "LT")
ordered_idx <- match(op, ordered_ops)
needs_flip <- maybe_needs_flip & !is.na(ordered_idx)
# un-benchmarked, but should be faster (though less readable) as
# > ordered_ops[5L - ordered_idx[needs_flip]]
op[needs_flip] <- rev(ordered_ops)[ordered_idx[needs_flip]]
op
}
Linter(linter_level = "expression", function(source_expression) {
xml <- source_expression$xml_parsed_content
comparison_expr <- xml_find_all(xml, comparison_xpath)
comparison_op <- op_for_msg(comparison_expr, const = "STR_CONST")
comparison_lints <- xml_nodes_to_lints(
comparison_expr,
source_expression = source_expression,
lint_message = paste0(
comparison_msg_map[comparison_op],
"Note that unlike nzchar(), ", comparison_op, " coerces to character, ",
"so you'll have to use as.character() if x is a factor. ",
keepna_note
),
type = "warning"
)
xml_calls <- source_expression$xml_find_function_calls("nchar")
nchar_expr <- xml_find_all(xml_calls, nchar_xpath)
nchar_op <- op_for_msg(nchar_expr, const = "NUM_CONST")
nchar_lints <- xml_nodes_to_lints(
nchar_expr,
source_expression = source_expression,
lint_message = paste0(nchar_msg_map[nchar_op], keepna_note),
type = "warning"
)
c(comparison_lints, nchar_lints)
})
}
|