1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
|
test_that("string_boundary_linter skips allowed grepl() usages", {
linter <- string_boundary_linter()
# no known start/end anchor --> no lint
expect_lint("grepl(p1, x)", NULL, linter)
# no start/end anchor --> no lint
expect_lint("grepl('abc', x)", NULL, linter)
# regex pattern --> no lint
expect_lint("grepl('^[a-z]', x)", NULL, linter)
expect_lint("grepl('[a-z]$', x)", NULL, linter)
# ignore.case --> no lint
expect_lint("grepl('^abc', x, ignore.case = TRUE)", NULL, linter)
expect_lint("grepl('^abc', x, ignore.case = ignore.case)", NULL, linter)
# fixed --> no lint
expect_lint("grepl('^abc', x, fixed = TRUE)", NULL, linter)
expect_lint("grepl('^abc', x, fixed = fixed)", NULL, linter)
})
test_that("string_boundary_linter skips allowed str_detect() usages", {
linter <- string_boundary_linter()
# no known start/end anchor --> no lint
expect_lint("str_detect(x, p1)", NULL, linter)
# no start/end anchor --> no lint
expect_lint("str_detect(x, 'abc')", NULL, linter)
# regex pattern --> no lint
expect_lint("str_detect(x, '^[a-z]')", NULL, linter)
expect_lint("str_detect(x, '[a-z]$')", NULL, linter)
})
test_that("string_boundary_linter skips allowed substr()/substring() usages", {
linter <- string_boundary_linter()
# no comparison operator --> no lint
expect_lint("substr(x, start, end)", NULL, linter)
# unknown indices --> no lint
expect_lint("substr(x, start, end) == 'a'", NULL, linter)
expect_lint("substring(x, start, end) == 'a'", NULL, linter)
# using foo(nchar(.))
expect_lint("substring(x, nchar(x) - 4, nchar(x) - 1) == 'abc'", NULL, linter)
# using nchar(), but not of the input
expect_lint("substring(x, nchar(y) - 4, nchar(y)) == 'abcd'", NULL, linter)
# using x in nchar(), but on foo(input)
expect_lint("substring(x, nchar(foo(x)) - 4, nchar(foo(x))) == 'abcd'", NULL, linter)
# _close_ to equivalent, but not so in general -- e.g.
# substring(s <- "abcdefg", 2L) == "efg" is not TRUE, but endsWith(s, "efg")
# is. And if `s` contains strings of varying lengths, there's no equivalent.
expect_lint("substring(x, 2L)", NULL, linter)
})
test_that("string_boundary_linter blocks simple disallowed grepl() usages", {
linter <- string_boundary_linter()
starts_message <- rex::rex("Use !is.na(x) & startsWith(x, string) to detect a fixed initial substring,")
ends_message <- rex::rex("Use !is.na(x) & endsWith(x, string) to detect a fixed terminal substring,")
expect_lint("grepl('^a', x)", starts_message, linter)
# non-trivially equivalent (but still same as startsWith())
expect_lint("grepl('^[.]', x)", starts_message, linter)
expect_lint("grepl('a$', x)", ends_message, linter)
# also get negation for free
expect_lint("!grepl('a$', x)", ends_message, linter)
# perl = TRUE doesn't matter
expect_lint("grepl('^a', x, perl = TRUE)", starts_message, linter)
# explicit FALSE (i.e., an explicit default) is ignored
expect_lint("grepl('^a', x, fixed = FALSE)", starts_message, linter)
expect_lint("grepl('^a', x, fixed = F)", starts_message, linter)
})
test_that("string_boundary_linter blocks simple disallowed str_detect() usages", {
linter <- string_boundary_linter()
expect_lint(
"str_detect(x, '^a')",
rex::rex("Use startsWith() to detect a fixed initial substring."),
linter
)
expect_lint(
"str_detect(x, 'a$')",
rex::rex("Use endsWith() to detect a fixed terminal substring."),
linter
)
})
test_that("string_boundary_linter blocks disallowed substr()/substring() usage", {
linter <- string_boundary_linter()
starts_message <- rex::rex("Use startsWith() to detect an initial substring.")
ends_message <- rex::rex("Use endsWith() to detect a terminal substring.")
expect_lint("substr(x, 1L, 2L) == 'ab'", starts_message, linter)
# end doesn't matter, just anchoring to 1L
expect_lint("substr(x, 1L, end) == 'ab'", starts_message, linter)
expect_lint("substring(x, nchar(x) - 4L, nchar(x)) == 'abcde'", ends_message, linter)
# start doesn't matter, just anchoring to nchar(x)
expect_lint("substring(x, start, nchar(x)) == 'abcde'", ends_message, linter)
# more complicated expressions
expect_lint("substring(colnames(x), start, nchar(colnames(x))) == 'abc'", ends_message, linter)
})
test_that("plain ^ or $ are skipped", {
linter <- string_boundary_linter()
expect_lint('grepl("^", x)', NULL, linter)
expect_lint('grepl("$", x)', NULL, linter)
})
test_that("substr inverted tests are caught as well", {
linter <- string_boundary_linter()
expect_lint(
"substr(x, 1L, 2L) != 'ab'",
rex::rex("Use startsWith() to detect an initial substring."),
linter
)
expect_lint(
"substring(x, nchar(x) - 4L, nchar(x)) != 'abcde'",
rex::rex("Use endsWith() to detect a terminal substring."),
linter
)
})
test_that("R>=4 raw strings are detected", {
linter <- string_boundary_linter()
skip_if_not_r_version("4.0.0")
expect_lint('grepl(R"(^.{3})", x)', NULL, linter)
expect_lint(
'grepl(R"(^abc)", x)',
rex::rex("Use !is.na(x) & startsWith(x, string) to detect a fixed initial substring,"),
linter
)
})
test_that("grepl() can optionally be ignored", {
linter <- string_boundary_linter(allow_grepl = TRUE)
expect_lint("grepl('^abc', x)", NULL, linter)
expect_lint("grepl('xyz$', x)", NULL, linter)
})
test_that("whole-string regex recommends ==, not {starts,ends}With()", {
linter <- string_boundary_linter()
lint_msg <- rex::rex("Use == to check for an exact string match.")
expect_lint("grepl('^abc$', x)", lint_msg, linter)
expect_lint("grepl('^a\\\\.b$', x)", lint_msg, linter)
expect_lint("str_detect(x, '^abc$')", lint_msg, linter)
expect_lint("str_detect(x, '^a[.]b$')", lint_msg, linter)
})
test_that("vectorization + metadata work as intended", {
expect_lint(
trim_some("{
substring(a, 1, 3) == 'abc'
substring(b, nchar(b) - 3, nchar(b)) == 'defg'
substr(c, 1, 3) == 'hij'
substr(d, nchar(d) - 3, nchar(d)) == 'klmn'
grepl('^abc', e)
grepl('abc$', f)
grepl('^abc$', g)
str_detect(h, '^abc')
str_detect(i, 'abc$')
str_detect(j, '^abc$')
}"),
list(
list("startsWith", line_number = 2L),
list("endsWith", line_number = 3L),
list("startsWith", line_number = 4L),
list("endsWith", line_number = 5L),
list("startsWith", line_number = 6L),
list("endsWith", line_number = 7L),
list("==", line_number = 8L),
list("startsWith", line_number = 9L),
list("endsWith", line_number = 10L),
list("==", line_number = 11L)
),
string_boundary_linter()
)
})
|