File: test-string_boundary_linter.R

package info (click to toggle)
r-cran-lintr 3.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,396 kB
  • sloc: sh: 13; xml: 10; makefile: 2
file content (182 lines) | stat: -rw-r--r-- 6,668 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
test_that("string_boundary_linter skips allowed grepl() usages", {
  linter <- string_boundary_linter()

  # no known start/end anchor --> no lint
  expect_lint("grepl(p1, x)", NULL, linter)
  # no start/end anchor --> no lint
  expect_lint("grepl('abc', x)", NULL, linter)
  # regex pattern --> no lint
  expect_lint("grepl('^[a-z]', x)", NULL, linter)
  expect_lint("grepl('[a-z]$', x)", NULL, linter)

  # ignore.case --> no lint
  expect_lint("grepl('^abc', x, ignore.case = TRUE)", NULL, linter)
  expect_lint("grepl('^abc', x, ignore.case = ignore.case)", NULL, linter)

  # fixed --> no lint
  expect_lint("grepl('^abc', x, fixed = TRUE)", NULL, linter)
  expect_lint("grepl('^abc', x, fixed = fixed)", NULL, linter)
})

test_that("string_boundary_linter skips allowed str_detect() usages", {
  linter <- string_boundary_linter()

  # no known start/end anchor --> no lint
  expect_lint("str_detect(x, p1)", NULL, linter)
  # no start/end anchor --> no lint
  expect_lint("str_detect(x, 'abc')", NULL, linter)
  # regex pattern --> no lint
  expect_lint("str_detect(x, '^[a-z]')", NULL, linter)
  expect_lint("str_detect(x, '[a-z]$')", NULL, linter)
})

test_that("string_boundary_linter skips allowed substr()/substring() usages", {
  linter <- string_boundary_linter()

  # no comparison operator --> no lint
  expect_lint("substr(x, start, end)", NULL, linter)
  # unknown indices --> no lint
  expect_lint("substr(x, start, end) == 'a'", NULL, linter)
  expect_lint("substring(x, start, end) == 'a'", NULL, linter)
  # using foo(nchar(.))
  expect_lint("substring(x, nchar(x) - 4, nchar(x) - 1) == 'abc'", NULL, linter)
  # using nchar(), but not of the input
  expect_lint("substring(x, nchar(y) - 4, nchar(y)) == 'abcd'", NULL, linter)
  # using x in nchar(), but on foo(input)
  expect_lint("substring(x, nchar(foo(x)) - 4, nchar(foo(x))) == 'abcd'", NULL, linter)

  # _close_ to equivalent, but not so in general -- e.g.
  #   substring(s <- "abcdefg", 2L) == "efg" is not TRUE, but endsWith(s, "efg")
  #   is. And if `s` contains strings of varying lengths, there's no equivalent.
  expect_lint("substring(x, 2L)", NULL, linter)
})

test_that("string_boundary_linter blocks simple disallowed grepl() usages", {
  linter <- string_boundary_linter()
  starts_message <- rex::rex("Use !is.na(x) & startsWith(x, string) to detect a fixed initial substring,")
  ends_message <- rex::rex("Use !is.na(x) & endsWith(x, string) to detect a fixed terminal substring,")

  expect_lint("grepl('^a', x)", starts_message, linter)
  # non-trivially equivalent (but still same as startsWith())
  expect_lint("grepl('^[.]', x)", starts_message, linter)
  expect_lint("grepl('a$', x)", ends_message, linter)
  # also get negation for free
  expect_lint("!grepl('a$', x)", ends_message, linter)

  # perl = TRUE doesn't matter
  expect_lint("grepl('^a', x, perl = TRUE)", starts_message, linter)
  # explicit FALSE (i.e., an explicit default) is ignored
  expect_lint("grepl('^a', x, fixed = FALSE)", starts_message, linter)
  expect_lint("grepl('^a', x, fixed = F)", starts_message, linter)
})

test_that("string_boundary_linter blocks simple disallowed str_detect() usages", {
  linter <- string_boundary_linter()

  expect_lint(
    "str_detect(x, '^a')",
    rex::rex("Use startsWith() to detect a fixed initial substring."),
    linter
  )
  expect_lint(
    "str_detect(x, 'a$')",
    rex::rex("Use endsWith() to detect a fixed terminal substring."),
    linter
  )
})

test_that("string_boundary_linter blocks disallowed substr()/substring() usage", {
  linter <- string_boundary_linter()
  starts_message <- rex::rex("Use startsWith() to detect an initial substring.")
  ends_message <- rex::rex("Use endsWith() to detect a terminal substring.")

  expect_lint("substr(x, 1L, 2L) == 'ab'", starts_message, linter)
  # end doesn't matter, just anchoring to 1L
  expect_lint("substr(x, 1L, end) == 'ab'", starts_message, linter)
  expect_lint("substring(x, nchar(x) - 4L, nchar(x)) == 'abcde'", ends_message, linter)
  # start doesn't matter, just anchoring to nchar(x)
  expect_lint("substring(x, start, nchar(x)) == 'abcde'", ends_message, linter)
  # more complicated expressions
  expect_lint("substring(colnames(x), start, nchar(colnames(x))) == 'abc'", ends_message, linter)
})

test_that("plain ^ or $ are skipped", {
  linter <- string_boundary_linter()

  expect_lint('grepl("^", x)', NULL, linter)
  expect_lint('grepl("$", x)', NULL, linter)
})

test_that("substr inverted tests are caught as well", {
  linter <- string_boundary_linter()

  expect_lint(
    "substr(x, 1L, 2L) != 'ab'",
    rex::rex("Use startsWith() to detect an initial substring."),
    linter
  )
  expect_lint(
    "substring(x, nchar(x) - 4L, nchar(x)) != 'abcde'",
    rex::rex("Use endsWith() to detect a terminal substring."),
    linter
  )
})

test_that("R>=4 raw strings are detected", {
  linter <- string_boundary_linter()

  skip_if_not_r_version("4.0.0")
  expect_lint('grepl(R"(^.{3})", x)', NULL, linter)
  expect_lint(
    'grepl(R"(^abc)", x)',
    rex::rex("Use !is.na(x) & startsWith(x, string) to detect a fixed initial substring,"),
    linter
  )
})

test_that("grepl() can optionally be ignored", {
  linter <- string_boundary_linter(allow_grepl = TRUE)

  expect_lint("grepl('^abc', x)", NULL, linter)
  expect_lint("grepl('xyz$', x)", NULL, linter)
})

test_that("whole-string regex recommends ==, not {starts,ends}With()", {
  linter <- string_boundary_linter()
  lint_msg <- rex::rex("Use == to check for an exact string match.")

  expect_lint("grepl('^abc$', x)", lint_msg, linter)
  expect_lint("grepl('^a\\\\.b$', x)", lint_msg, linter)
  expect_lint("str_detect(x, '^abc$')", lint_msg, linter)
  expect_lint("str_detect(x, '^a[.]b$')", lint_msg, linter)
})

test_that("vectorization + metadata work as intended", {
  expect_lint(
    trim_some("{
      substring(a, 1, 3) == 'abc'
      substring(b, nchar(b) - 3, nchar(b)) == 'defg'
      substr(c, 1, 3) == 'hij'
      substr(d, nchar(d) - 3, nchar(d)) == 'klmn'
      grepl('^abc', e)
      grepl('abc$', f)
      grepl('^abc$', g)
      str_detect(h, '^abc')
      str_detect(i, 'abc$')
      str_detect(j, '^abc$')
    }"),
    list(
      list("startsWith", line_number = 2L),
      list("endsWith", line_number = 3L),
      list("startsWith", line_number = 4L),
      list("endsWith", line_number = 5L),
      list("startsWith", line_number = 6L),
      list("endsWith", line_number = 7L),
      list("==", line_number = 8L),
      list("startsWith", line_number = 9L),
      list("endsWith", line_number = 10L),
      list("==", line_number = 11L)
    ),
    string_boundary_linter()
  )
})