File: scanner_spec.rb

package info (click to toggle)
ruby-re2 2.7.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,872 kB
  • sloc: ruby: 1,902; cpp: 1,165; makefile: 7
file content (275 lines) | stat: -rw-r--r-- 7,654 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
# encoding: utf-8

RSpec.describe RE2::Scanner do
  describe "#regexp" do
    it "returns the original pattern for the scanner" do
      re = RE2::Regexp.new('(\w+)')
      scanner = re.scan("It is a truth")

      expect(scanner.regexp).to equal(re)
    end
  end

  describe "#string" do
    it "returns the original text for the scanner" do
      re = RE2::Regexp.new('(\w+)')
      text = "It is a truth"
      scanner = re.scan(text)

      expect(scanner.string).to equal(text)
    end
  end

  describe "#scan" do
    it "returns the next array of matches", :aggregate_failures do
      r = RE2::Regexp.new('(\w+)')
      scanner = r.scan("It is a truth universally acknowledged")

      expect(scanner.scan).to eq(["It"])
      expect(scanner.scan).to eq(["is"])
      expect(scanner.scan).to eq(["a"])
      expect(scanner.scan).to eq(["truth"])
      expect(scanner.scan).to eq(["universally"])
      expect(scanner.scan).to eq(["acknowledged"])
      expect(scanner.scan).to be_nil
    end

    it "supports scanning inputs with null bytes", :aggregate_failures do
      r = RE2::Regexp.new("(\\w\0\\w)")
      scanner = r.scan("a\0b c\0d e\0f")

      expect(scanner.scan).to eq(["a\0b"])
      expect(scanner.scan).to eq(["c\0d"])
      expect(scanner.scan).to eq(["e\0f"])
      expect(scanner.scan).to be_nil
    end

    it "returns UTF-8 matches if the pattern is UTF-8" do
      r = RE2::Regexp.new('(\w+)')
      scanner = r.scan("It")
      matches = scanner.scan

      expect(matches.first.encoding).to eq(Encoding::UTF_8)
    end

    it "returns ISO-8859-1 matches if the pattern is not UTF-8" do
      r = RE2::Regexp.new('(\w+)', utf8: false)
      scanner = r.scan("It")
      matches = scanner.scan

      expect(matches.first.encoding).to eq(Encoding::ISO_8859_1)
    end

    it "returns multiple capturing groups at a time", :aggregate_failures do
      r = RE2::Regexp.new('(\w+) (\w+)')
      scanner = r.scan("It is a truth universally acknowledged")

      expect(scanner.scan).to eq(["It", "is"])
      expect(scanner.scan).to eq(["a", "truth"])
      expect(scanner.scan).to eq(["universally", "acknowledged"])
      expect(scanner.scan).to be_nil
    end

    it "returns an empty array if there are no capturing groups" do
      r = RE2::Regexp.new('\w+')
      scanner = r.scan("Foo bar")

      expect(scanner.scan).to eq([])
    end

    it "returns nil if there is no match" do
      r = RE2::Regexp.new('\d+')
      scanner = r.scan("Foo bar")

      expect(scanner.scan).to be_nil
    end

    it "returns nil if the regexp is invalid" do
      r = RE2::Regexp.new('???', log_errors: false)
      scanner = r.scan("Foo bar")

      expect(scanner.scan).to be_nil
    end

    it "returns an empty array if the input is empty", :aggregate_failures do
      r = RE2::Regexp.new("")
      scanner = r.scan("")

      expect(scanner.scan).to eq([])
      expect(scanner.scan).to be_nil
    end

    it "returns an array of nil with an empty input and capture", :aggregate_failures do
      r = RE2::Regexp.new("()")
      scanner = r.scan("")

      expect(scanner.scan).to eq([nil])
      expect(scanner.scan).to be_nil
    end

    it "returns an empty array for every match if the pattern is empty", :aggregate_failures do
      r = RE2::Regexp.new("")
      scanner = r.scan("Foo")

      expect(scanner.scan).to eq([])
      expect(scanner.scan).to eq([])
      expect(scanner.scan).to eq([])
      expect(scanner.scan).to eq([])
      expect(scanner.scan).to be_nil
    end

    it "returns an array of nil if the pattern is an empty capturing group", :aggregate_failures do
      r = RE2::Regexp.new("()")
      scanner = r.scan("Foo")

      expect(scanner.scan).to eq([nil])
      expect(scanner.scan).to eq([nil])
      expect(scanner.scan).to eq([nil])
      expect(scanner.scan).to eq([nil])
      expect(scanner.scan).to be_nil
    end

    it "returns array of nils with multiple empty capturing groups", :aggregate_failures do
      r = RE2::Regexp.new("()()()")
      scanner = r.scan("Foo")

      expect(scanner.scan).to eq([nil, nil, nil])
      expect(scanner.scan).to eq([nil, nil, nil])
      expect(scanner.scan).to eq([nil, nil, nil])
      expect(scanner.scan).to eq([nil, nil, nil])
      expect(scanner.scan).to be_nil
    end

    it "supports empty groups with multibyte characters", :aggregate_failures do
      r = RE2::Regexp.new("()€")
      scanner = r.scan("€")

      expect(scanner.scan).to eq([nil])
      expect(scanner.scan).to be_nil
    end

    it "raises a Type Error if given input that can't be coerced to a String" do
      r = RE2::Regexp.new('(\w+)')

      expect { r.scan(0) }.to raise_error(TypeError)
    end

    it "accepts input that can be coerced to a String", :aggregate_failures do
      r = RE2::Regexp.new('(\w+)')
      scanner = r.scan(StringLike.new("Hello world"))

      expect(scanner.scan).to eq(["Hello"])
      expect(scanner.scan).to eq(["world"])
      expect(scanner.scan).to be_nil
    end
  end

  it "is enumerable" do
    r = RE2::Regexp.new('(\d)')
    scanner = r.scan("There are 1 some 2 numbers 3")

    expect(scanner).to be_a(Enumerable)
  end

  describe "#each" do
    it "yields each match" do
      r = RE2::Regexp.new('(\d)')
      scanner = r.scan("There are 1 some 2 numbers 3")

      expect { |b| scanner.each(&b) }.to yield_successive_args(["1"], ["2"], ["3"])
    end

    it "returns an enumerator when not given a block" do
      r = RE2::Regexp.new('(\d)')
      scanner = r.scan("There are 1 some 2 numbers 3")

      expect(scanner.each).to be_a(Enumerator)
    end
  end

  describe "#rewind" do
    it "resets any consumption", :aggregate_failures do
      r = RE2::Regexp.new('(\d)')
      scanner = r.scan("There are 1 some 2 numbers 3")

      expect(scanner.to_enum.first).to eq(["1"])
      expect(scanner.to_enum.first).to eq(["2"])

      scanner.rewind

      expect(scanner.to_enum.first).to eq(["1"])
    end

    it "supports inputs with null bytes", :aggregate_failures do
      r = RE2::Regexp.new("(\\w\0\\w)")
      scanner = r.scan("a\0b c\0d")

      expect(scanner.to_enum.first).to eq(["a\0b"])
      expect(scanner.to_enum.first).to eq(["c\0d"])

      scanner.rewind

      expect(scanner.to_enum.first).to eq(["a\0b"])
    end

    it "resets the eof? check", :aggregate_failures do
      r = RE2::Regexp.new('(\d)')
      scanner = r.scan("1")
      scanner.scan

      expect(scanner).to be_eof

      scanner.rewind

      expect(scanner).not_to be_eof
    end
  end

  describe "#eof?" do
    it "returns false if the input has not been consumed" do
      r = RE2::Regexp.new('(\d)')
      scanner = r.scan("1 2 3")

      expect(scanner).not_to be_eof
    end

    it "returns true if the input has been consumed" do
      r = RE2::Regexp.new('(\d)')
      scanner = r.scan("1")
      scanner.scan

      expect(scanner).to be_eof
    end

    it "returns false if no match is made" do
      r = RE2::Regexp.new('(\d)')
      scanner = r.scan("a")
      scanner.scan

      expect(scanner).not_to be_eof
    end

    it "returns false with an empty input that has not been scanned" do
      r = RE2::Regexp.new("")
      scanner = r.scan("")

      expect(scanner).not_to be_eof
    end

    it "returns false with an empty input that has not been matched" do
      r = RE2::Regexp.new('(\d)')
      scanner = r.scan("")
      scanner.scan

      expect(scanner).not_to be_eof
    end

    it "returns true with an empty input that has been matched" do
      r = RE2::Regexp.new("")
      scanner = r.scan("")
      scanner.scan

      expect(scanner).to be_eof
    end
  end
end