1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
|
# encoding: utf-8
RSpec.describe RE2::Scanner do
describe "#regexp" do
it "returns the original pattern for the scanner" do
re = RE2::Regexp.new('(\w+)')
scanner = re.scan("It is a truth")
expect(scanner.regexp).to equal(re)
end
end
describe "#string" do
it "returns the original text for the scanner" do
re = RE2::Regexp.new('(\w+)')
text = "It is a truth"
scanner = re.scan(text)
expect(scanner.string).to equal(text)
end
end
describe "#scan" do
it "returns the next array of matches", :aggregate_failures do
r = RE2::Regexp.new('(\w+)')
scanner = r.scan("It is a truth universally acknowledged")
expect(scanner.scan).to eq(["It"])
expect(scanner.scan).to eq(["is"])
expect(scanner.scan).to eq(["a"])
expect(scanner.scan).to eq(["truth"])
expect(scanner.scan).to eq(["universally"])
expect(scanner.scan).to eq(["acknowledged"])
expect(scanner.scan).to be_nil
end
it "supports scanning inputs with null bytes", :aggregate_failures do
r = RE2::Regexp.new("(\\w\0\\w)")
scanner = r.scan("a\0b c\0d e\0f")
expect(scanner.scan).to eq(["a\0b"])
expect(scanner.scan).to eq(["c\0d"])
expect(scanner.scan).to eq(["e\0f"])
expect(scanner.scan).to be_nil
end
it "returns UTF-8 matches if the pattern is UTF-8" do
r = RE2::Regexp.new('(\w+)')
scanner = r.scan("It")
matches = scanner.scan
expect(matches.first.encoding).to eq(Encoding::UTF_8)
end
it "returns ISO-8859-1 matches if the pattern is not UTF-8" do
r = RE2::Regexp.new('(\w+)', utf8: false)
scanner = r.scan("It")
matches = scanner.scan
expect(matches.first.encoding).to eq(Encoding::ISO_8859_1)
end
it "returns multiple capturing groups at a time", :aggregate_failures do
r = RE2::Regexp.new('(\w+) (\w+)')
scanner = r.scan("It is a truth universally acknowledged")
expect(scanner.scan).to eq(["It", "is"])
expect(scanner.scan).to eq(["a", "truth"])
expect(scanner.scan).to eq(["universally", "acknowledged"])
expect(scanner.scan).to be_nil
end
it "returns an empty array if there are no capturing groups" do
r = RE2::Regexp.new('\w+')
scanner = r.scan("Foo bar")
expect(scanner.scan).to eq([])
end
it "returns nil if there is no match" do
r = RE2::Regexp.new('\d+')
scanner = r.scan("Foo bar")
expect(scanner.scan).to be_nil
end
it "returns nil if the regexp is invalid" do
r = RE2::Regexp.new('???', log_errors: false)
scanner = r.scan("Foo bar")
expect(scanner.scan).to be_nil
end
it "returns an empty array if the input is empty", :aggregate_failures do
r = RE2::Regexp.new("")
scanner = r.scan("")
expect(scanner.scan).to eq([])
expect(scanner.scan).to be_nil
end
it "returns an array of nil with an empty input and capture", :aggregate_failures do
r = RE2::Regexp.new("()")
scanner = r.scan("")
expect(scanner.scan).to eq([nil])
expect(scanner.scan).to be_nil
end
it "returns an empty array for every match if the pattern is empty", :aggregate_failures do
r = RE2::Regexp.new("")
scanner = r.scan("Foo")
expect(scanner.scan).to eq([])
expect(scanner.scan).to eq([])
expect(scanner.scan).to eq([])
expect(scanner.scan).to eq([])
expect(scanner.scan).to be_nil
end
it "returns an array of nil if the pattern is an empty capturing group", :aggregate_failures do
r = RE2::Regexp.new("()")
scanner = r.scan("Foo")
expect(scanner.scan).to eq([nil])
expect(scanner.scan).to eq([nil])
expect(scanner.scan).to eq([nil])
expect(scanner.scan).to eq([nil])
expect(scanner.scan).to be_nil
end
it "returns array of nils with multiple empty capturing groups", :aggregate_failures do
r = RE2::Regexp.new("()()()")
scanner = r.scan("Foo")
expect(scanner.scan).to eq([nil, nil, nil])
expect(scanner.scan).to eq([nil, nil, nil])
expect(scanner.scan).to eq([nil, nil, nil])
expect(scanner.scan).to eq([nil, nil, nil])
expect(scanner.scan).to be_nil
end
it "supports empty groups with multibyte characters", :aggregate_failures do
r = RE2::Regexp.new("()€")
scanner = r.scan("€")
expect(scanner.scan).to eq([nil])
expect(scanner.scan).to be_nil
end
it "raises a Type Error if given input that can't be coerced to a String" do
r = RE2::Regexp.new('(\w+)')
expect { r.scan(0) }.to raise_error(TypeError)
end
it "accepts input that can be coerced to a String", :aggregate_failures do
r = RE2::Regexp.new('(\w+)')
scanner = r.scan(StringLike.new("Hello world"))
expect(scanner.scan).to eq(["Hello"])
expect(scanner.scan).to eq(["world"])
expect(scanner.scan).to be_nil
end
end
it "is enumerable" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("There are 1 some 2 numbers 3")
expect(scanner).to be_a(Enumerable)
end
describe "#each" do
it "yields each match" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("There are 1 some 2 numbers 3")
expect { |b| scanner.each(&b) }.to yield_successive_args(["1"], ["2"], ["3"])
end
it "returns an enumerator when not given a block" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("There are 1 some 2 numbers 3")
expect(scanner.each).to be_a(Enumerator)
end
end
describe "#rewind" do
it "resets any consumption", :aggregate_failures do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("There are 1 some 2 numbers 3")
expect(scanner.to_enum.first).to eq(["1"])
expect(scanner.to_enum.first).to eq(["2"])
scanner.rewind
expect(scanner.to_enum.first).to eq(["1"])
end
it "supports inputs with null bytes", :aggregate_failures do
r = RE2::Regexp.new("(\\w\0\\w)")
scanner = r.scan("a\0b c\0d")
expect(scanner.to_enum.first).to eq(["a\0b"])
expect(scanner.to_enum.first).to eq(["c\0d"])
scanner.rewind
expect(scanner.to_enum.first).to eq(["a\0b"])
end
it "resets the eof? check", :aggregate_failures do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("1")
scanner.scan
expect(scanner).to be_eof
scanner.rewind
expect(scanner).not_to be_eof
end
end
describe "#eof?" do
it "returns false if the input has not been consumed" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("1 2 3")
expect(scanner).not_to be_eof
end
it "returns true if the input has been consumed" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("1")
scanner.scan
expect(scanner).to be_eof
end
it "returns false if no match is made" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("a")
scanner.scan
expect(scanner).not_to be_eof
end
it "returns false with an empty input that has not been scanned" do
r = RE2::Regexp.new("")
scanner = r.scan("")
expect(scanner).not_to be_eof
end
it "returns false with an empty input that has not been matched" do
r = RE2::Regexp.new('(\d)')
scanner = r.scan("")
scanner.scan
expect(scanner).not_to be_eof
end
it "returns true with an empty input that has been matched" do
r = RE2::Regexp.new("")
scanner = r.scan("")
scanner.scan
expect(scanner).to be_eof
end
end
end
|