File: escapes_spec.rb

package info (click to toggle)
ruby-regexp-parser 2.11.3-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,092 kB
  • sloc: ruby: 6,891; makefile: 6; sh: 3
file content (101 lines) | stat: -rw-r--r-- 7,463 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# frozen_string_literal: true

require 'spec_helper'

RSpec.describe('Escape scanning') do
  include_examples 'scan', /c\at/,            1 => [:escape,  :bell,             '\a',             1,  3]

  # not an escape outside a character set
  include_examples 'scan', /c\bt/,            1 => [:anchor,  :word_boundary,    '\b',             1,  3]

  include_examples 'scan', /c\ft/,            1 => [:escape,  :form_feed,        '\f',             1,  3]
  include_examples 'scan', /c\nt/,            1 => [:escape,  :newline,          '\n',             1,  3]
  include_examples 'scan', /c\tt/,            1 => [:escape,  :tab,              '\t',             1,  3]
  include_examples 'scan', /c\vt/,            1 => [:escape,  :vertical_tab,     '\v',             1,  3]

  # ineffectual literal escapes
  # these cause "Unknown escape" warnings in Ruby for ascii chars,
  # and simply drop the backslash for non-ascii chars (/\ü/.inspect == '/ü/').
  # In terms of matching, Ruby treats them both like non-escaped literals.
  include_examples 'scan', 'c\qt',            1 => [:escape,  :literal,          '\q',             1,  3]
  include_examples 'scan', 'a\üc',            1 => [:escape, :literal,           '\ü',             1,  3]
  include_examples 'scan', 'a\😋c',           1 => [:escape, :literal,            '\😋',            1,  3]

  # these incomplete ref/call sequences are treated as literal escapes by Ruby
  include_examples 'scan', 'c\gt',            1 => [:escape,  :literal,          '\g',             1,  3]
  include_examples 'scan', 'c\kt',            1 => [:escape,  :literal,          '\k',             1,  3]

  include_examples 'scan', 'a\012c',          1 => [:escape,  :octal,            '\012',           1,  5]
  include_examples 'scan', 'a\0124',          1 => [:escape,  :octal,            '\012',           1,  5]
  include_examples 'scan', '\712+7',          0 => [:escape,  :octal,            '\712',           0,  4]

  # Multi-digit escaped numbers that occur before sufficient capturing groups
  # have been opened are treated as octal or literal.
  # "\10"[/\10()()()()()()()()()()/] # => "\b" # treated as octal
  # "\70"[/\70()()()()()()()()()()/] # => "8"  # treated as octal
  # "90"[/\90()()()()()()()()()()/]  # => "90" # treated as literal
  # For cases treated as backrefs, see ./refcalls_spec.rb
  include_examples 'scan', "\\10#{'()' * 10}",0 => [:escape,  :octal,    '\10',            0,  3]
  include_examples 'scan', "\\90#{'()' * 90}",0 => [:escape,  :literal,  '\9',             0,  2],
                                              1 => [:literal, :literal,  '0',              2,  3]

  # special case: "out-of-bound octal escapes" (digits > 7) are not treated as backrefs
  include_examples 'scan', '\80',             0 => [:escape,  :literal,          '\8',             0,  2]
  include_examples 'scan', '\80',             1 => [:literal, :literal,          '0',              2,  3]

  include_examples 'scan', 'a\xA',            1 => [:escape,  :hex,              '\xA',            1,  4]
  include_examples 'scan', 'a\x24c',          1 => [:escape,  :hex,              '\x24',           1,  5]
  include_examples 'scan', 'a\x0640c',        1 => [:escape,  :hex,              '\x06',           1,  5]
  include_examples 'scan', 'a\xE2\x82\xAC',   1 => [:escape,  :utf8_hex,         '\xE2\x82\xAC',   1,  13]
  include_examples 'scan', /a\xE2\x82\xAC/n,  1 => [:escape,  :hex,              '\xE2',           1,  5]
  include_examples 'scan', /a\xE2\x82\xAC/n,  2 => [:escape,  :hex,              '\x82',           5,  9]
  include_examples 'scan', /a\xE2\x82\xAC/n,  3 => [:escape,  :hex,              '\xAC',           9,  13]

  include_examples 'scan', 'a\u0640c',        1 => [:escape,  :codepoint,        '\u0640',         1,  7]
  include_examples 'scan', 'a\u{640 0641}c',  1 => [:escape,  :codepoint_list,   '\u{640 0641}',   1,  13]
  include_examples 'scan', 'a\u{10FFFF}c',    1 => [:escape,  :codepoint_list,   '\u{10FFFF}',     1,  11]

  include_examples 'scan', 'ab\\\xcd',        1 => [:escape,  :backslash,        '\\\\',           2,  4]
  include_examples 'scan', 'ab\\\0cd',        1 => [:escape,  :backslash,        '\\\\',           2,  4]
  include_examples 'scan', 'ab\\\Kcd',        1 => [:escape,  :backslash,        '\\\\',           2,  4]

  include_examples 'scan', 'ab\^cd',          1 => [:escape,  :bol,              '\^',             2,  4]
  include_examples 'scan', 'ab\$cd',          1 => [:escape,  :eol,              '\$',             2,  4]
  include_examples 'scan', 'ab\[cd',          1 => [:escape,  :set_open,         '\[',             2,  4]

  # escaped whitespace in x-mode
  include_examples 'scan', /a\ b/x,           0 => [:literal,  :literal,         'a',              0,  1],
                                              1 => [:escape,   :literal,         '\ ',             1,  3],
                                              2 => [:literal,  :literal,         'b',              3,  4]
  # newline literals can't be escaped in x-mode, c.f. https://bugs.ruby-lang.org/issues/19639
  include_examples 'scan', /a\
b/x,                                          0 => [:literal,  :literal,         'ab',             0,  2]

  # Meta/control escapes
  #
  # After the following fix in Ruby 3.1, a Regexp#source containing meta/control
  # escapes can only be set with the Regexp::new constructor.
  # In Regexp literals, these escapes are now pre-processed to hex escapes.
  #
  # https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
  n = ->(regexp_body){ Regexp.new(regexp_body.dup.force_encoding('ascii-8bit')) }

  include_examples 'scan', 'a\cBc',           1 => [:escape,  :control,          '\cB',            1,  4]
  include_examples 'scan', 'a\c^c',           1 => [:escape,  :control,          '\c^',            1,  4]
  include_examples 'scan', 'a\c\n',           1 => [:escape,  :control,          '\c\n',           1,  5]
  include_examples 'scan', 'a\c\\\\b',        1 => [:escape,  :control,          '\c\\\\',         1,  5]
  include_examples 'scan', 'a\C-bc',          1 => [:escape,  :control,          '\C-b',           1,  5]
  include_examples 'scan', 'a\C-^b',          1 => [:escape,  :control,          '\C-^',           1,  5]
  include_examples 'scan', 'a\C-\nb',         1 => [:escape,  :control,          '\C-\n',          1,  6]
  include_examples 'scan', 'a\C-\\\\b',       1 => [:escape,  :control,          '\C-\\\\',        1,  6]
  include_examples 'scan', n.('a\c\M-Bc'),    1 => [:escape,  :control,          '\c\M-B',         1,  7]
  include_examples 'scan', n.('a\C-\M-Bc'),   1 => [:escape,  :control,          '\C-\M-B',        1,  8]

  include_examples 'scan', n.('a\M-Bc'),      1 => [:escape,  :meta_sequence,    '\M-B',           1,  5]
  include_examples 'scan', n.('a\M-\cBc'),    1 => [:escape,  :meta_sequence,    '\M-\cB',         1,  7]
  include_examples 'scan', n.('a\M-\c^'),     1 => [:escape,  :meta_sequence,    '\M-\c^',         1,  7]
  include_examples 'scan', n.('a\M-\c\n'),    1 => [:escape,  :meta_sequence,    '\M-\c\n',        1,  8]
  include_examples 'scan', n.('a\M-\c\\\\'),  1 => [:escape,  :meta_sequence,    '\M-\c\\\\',      1,  8]
  include_examples 'scan', n.('a\M-\C-Bc'),   1 => [:escape,  :meta_sequence,    '\M-\C-B',        1,  8]
  include_examples 'scan', n.('a\M-\C-\\\\'), 1 => [:escape,  :meta_sequence,    '\M-\C-\\\\',     1,  9]
end