File: sets_spec.rb

package info (click to toggle)
ruby-regexp-parser 2.6.1-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 968 kB
  • sloc: ruby: 6,396; sh: 12; makefile: 6
file content (151 lines) | stat: -rw-r--r-- 11,487 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
require 'spec_helper'

RSpec.describe('Set scanning') do
  include_examples 'scan', /[a]/,                   0 => [:set,    :open,            '[',          0, 1]
  include_examples 'scan', /[b]/,                   2 => [:set,    :close,           ']',          2, 3]
  include_examples 'scan', /[^n]/,                  1 => [:set,    :negate,          '^',          1, 2]

  include_examples 'scan', /[c]/,                   1 => [:literal, :literal,        'c',          1, 2]
  include_examples 'scan', /[^d]/,                  2 => [:literal, :literal,        'd',          2, 3]

  include_examples 'scan', /[\b]/,                  1 => [:escape, :backspace,       '\b',         1, 3]
  include_examples 'scan', /[A\bX]/,                2 => [:escape, :backspace,       '\b',         2, 4]

  include_examples 'scan', /[\a]/,                  1 => [:escape, :bell,            '\a',         1, 3]
  include_examples 'scan', /[\e]/,                  1 => [:escape, :escape,          '\e',         1, 3]
  include_examples 'scan', /[\f]/,                  1 => [:escape, :form_feed,       '\f',         1, 3]
  include_examples 'scan', /[\n]/,                  1 => [:escape, :newline,         '\n',         1, 3]
  include_examples 'scan', /[\r]/,                  1 => [:escape, :carriage,        '\r',         1, 3]
  include_examples 'scan', /[\t]/,                  1 => [:escape, :tab,             '\t',         1, 3]
  include_examples 'scan', /[\v]/,                  1 => [:escape, :vertical_tab,    '\v',         1, 3]

  include_examples 'scan', /[.]/,                   1 => [:literal, :literal,        '.',          1, 2]
  include_examples 'scan', /[?]/,                   1 => [:literal, :literal,        '?',          1, 2]
  include_examples 'scan', /[*]/,                   1 => [:literal, :literal,        '*',          1, 2]
  include_examples 'scan', /[+]/,                   1 => [:literal, :literal,        '+',          1, 2]
  include_examples 'scan', /[{]/,                   1 => [:literal, :literal,        '{',          1, 2]
  include_examples 'scan', /[}]/,                   1 => [:literal, :literal,        '}',          1, 2]
  include_examples 'scan', /[<]/,                   1 => [:literal, :literal,        '<',          1, 2]
  include_examples 'scan', /[>]/,                   1 => [:literal, :literal,        '>',          1, 2]

  include_examples 'scan', '[\\\\]',                1 => [:escape, :backslash,       '\\\\',       1, 3]
  include_examples 'scan', '[\u0040]',              1 => [:escape, :codepoint,       '\u0040',     1, 7]
  include_examples 'scan', '[\u{40}]',              1 => [:escape, :codepoint_list,  '\u{40}',     1, 7]
  include_examples 'scan', '[\c2]',                 1 => [:escape, :control,         '\c2',        1, 4]
  include_examples 'scan', '[\C-C]',                1 => [:escape, :control,         '\C-C',       1, 5]
  include_examples 'scan', '[\x20]',                1 => [:escape, :hex,             '\x20',       1, 5]
  include_examples 'scan', '[\M-Z]',                1 => [:escape, :meta_sequence,   '\M-Z',       1, 5]
  include_examples 'scan', '[\M-\C-X]',             1 => [:escape, :meta_sequence,   '\M-\C-X',    1, 8]
  include_examples 'scan', '[\\[]',                 1 => [:escape, :set_open,        '\[',         1, 3]
  include_examples 'scan', '[\\]]',                 1 => [:escape, :set_close,       '\]',         1, 3]
  include_examples 'scan', '[a\-]',                 2 => [:escape, :literal,         '\-',         2, 4]
  include_examples 'scan', '[\-c]',                 1 => [:escape, :literal,         '\-',         1, 3]
  include_examples 'scan', '[\.]',                  1 => [:escape, :literal,         '\.',         1, 3]
  include_examples 'scan', '[\?]',                  1 => [:escape, :literal,         '\?',         1, 3]
  include_examples 'scan', '[\*]',                  1 => [:escape, :literal,         '\*',         1, 3]
  include_examples 'scan', '[\+]',                  1 => [:escape, :literal,         '\+',         1, 3]
  include_examples 'scan', '[\|]',                  1 => [:escape, :literal,         '\|',         1, 3]
  include_examples 'scan', '[\{]',                  1 => [:escape, :literal,         '\{',         1, 3]
  include_examples 'scan', '[\}]',                  1 => [:escape, :literal,         '\}',         1, 3]
  include_examples 'scan', '[\(]',                  1 => [:escape, :literal,         '\(',         1, 3]
  include_examples 'scan', '[\)]',                  1 => [:escape, :literal,         '\)',         1, 3]
  include_examples 'scan', '[\!]',                  1 => [:escape, :literal,         '\!',         1, 3]
  include_examples 'scan', '[\#]',                  1 => [:escape, :literal,         '\#',         1, 3]
  include_examples 'scan', '[\A]',                  1 => [:escape, :literal,         '\A',         1, 3]
  include_examples 'scan', '[\z]',                  1 => [:escape, :literal,         '\z',         1, 3]
  include_examples 'scan', '[\g]',                  1 => [:escape, :literal,         '\g',         1, 3]
  include_examples 'scan', '[\K]',                  1 => [:escape, :literal,         '\K',         1, 3]
  include_examples 'scan', '[\R]',                  1 => [:escape, :literal,         '\R',         1, 3]
  include_examples 'scan', '[\X]',                  1 => [:escape, :literal,         '\X',         1, 3]
  include_examples 'scan', '[\B]',                  1 => [:escape, :literal,         '\B',         1, 3]

  include_examples 'scan', /[\d]/,                  1 => [:type,   :digit,           '\d',         1, 3]
  include_examples 'scan', /[\da-z]/,               1 => [:type,   :digit,           '\d',         1, 3]
  include_examples 'scan', /[\D]/,                  1 => [:type,   :nondigit,        '\D',         1, 3]

  include_examples 'scan', /[\h]/,                  1 => [:type,   :hex,             '\h',         1, 3]
  include_examples 'scan', /[\H]/,                  1 => [:type,   :nonhex,          '\H',         1, 3]

  include_examples 'scan', /[\s]/,                  1 => [:type,   :space,           '\s',         1, 3]
  include_examples 'scan', /[\S]/,                  1 => [:type,   :nonspace,        '\S',         1, 3]

  include_examples 'scan', /[\w]/,                  1 => [:type,   :word,            '\w',         1, 3]
  include_examples 'scan', /[\W]/,                  1 => [:type,   :nonword,         '\W',         1, 3]

  include_examples 'scan', /[a-b]/,                 1 => [:literal, :literal,        'a',          1, 2]
  include_examples 'scan', /[a-c]/,                 2 => [:set,     :range,          '-',          2, 3]
  include_examples 'scan', /[a-d]/,                 3 => [:literal, :literal,        'd',          3, 4]
  include_examples 'scan', /[a-b-]/,                4 => [:literal, :literal,        '-',          4, 5]
  include_examples 'scan', /[-a]/,                  1 => [:literal, :literal,        '-',          1, 2]
  include_examples 'scan', /[a-c^]/,                4 => [:literal, :literal,        '^',          4, 5]
  include_examples 'scan', /[a-bd-f]/,              2 => [:set,     :range,          '-',          2, 3]
  include_examples 'scan', /[a-cd-f]/,              5 => [:set,     :range,          '-',          5, 6]
  # this is a buggy range, it matches only `c`, but not `a`, `b` or `-`
  include_examples 'scan', /[a-[c]]/,               2 => [:set,     :range,          '-',          2, 3]
  # these are not ranges, they match `a`, `c` and `-` (or non-`-` if negated)
  include_examples 'scan', /[[a]-[c]]/,             4 => [:literal, :literal,        '-',          4, 5]
  include_examples 'scan', /[[a]-c]/,               4 => [:literal, :literal,        '-',          4, 5]
  include_examples 'scan', /[^-c]/,                 2 => [:literal, :literal,        '-',          2, 3]

  include_examples 'scan', /[a[:digit:]c]/,         2 => [:posixclass,    :digit,    '[:digit:]',  2, 11]
  include_examples 'scan', /[[:digit:][:space:]]/,  2 => [:posixclass,    :space,    '[:space:]', 10, 19]
  include_examples 'scan', /[[:^digit:]]/,          1 => [:nonposixclass, :digit,    '[:^digit:]', 1, 11]

  include_examples 'scan', /[a-d&&g-h]/,            4 => [:set,    :intersection,    '&&',         4, 6]
  include_examples 'scan', /[a&&]/,                 2 => [:set,    :intersection,    '&&',         2, 4]
  include_examples 'scan', /[&&z]/,                 1 => [:set,    :intersection,    '&&',         1, 3]
  include_examples 'scan', /[&&]/,                  1 => [:set,    :intersection,    '&&',         1, 3]

  include_examples 'scan', '[a\p{digit}c]',         2 => [:property,    :digit,      '\p{digit}',  2, 11]
  include_examples 'scan', '[a\P{digit}c]',         2 => [:nonproperty, :digit,      '\P{digit}',  2, 11]
  include_examples 'scan', '[a\p{^digit}c]',        2 => [:nonproperty, :digit,      '\p{^digit}', 2, 12]
  include_examples 'scan', '[a\P{^digit}c]',        2 => [:property,    :digit,      '\P{^digit}', 2, 12]

  include_examples 'scan', '[a\p{ALPHA}c]',         2 => [:property,    :alpha,      '\p{ALPHA}',  2, 11]
  include_examples 'scan', '[a\p{P}c]',             2 => [:property,    :punctuation,'\p{P}',      2, 7]
  include_examples 'scan', '[a\p{P}\P{P}c]',        3 => [:nonproperty, :punctuation,'\P{P}',      7, 12]

  include_examples 'scan', '[\x20-\x27]',
    1 => [:escape, :hex,             '\x20',       1, 5],
    2 => [:set,    :range,           '-',          5, 6],
    3 => [:escape, :hex,             '\x27',       6, 10]

  include_examples 'scan', '[a-w&&[^c-g]z]',
    5 => [:set,    :open,            '[',          6, 7],
    6 => [:set,    :negate,          '^',          7, 8],
    8 => [:set,    :range,           '-',          9, 10],
    10=> [:set,    :close,           ']',          11, 12]

  # Collations/collating sequences and character equivalents are not enabled
  # in Ruby at the moment. If they ever are, enable them in the scanner,
  # add them to a new syntax version, and handle them in the parser. Until then,
  # expect them to be scanned as regular subsets containing literals.
  # include_examples 'scan', /[a[.a-b.]c]/,           2 => [:set,    :collation,       '[.a-b.]',    2,  9]
  # include_examples 'scan', /[a[=e=]c]/,             2 => [:set,    :equivalent,      '[=e=]',      2,  7]
  include_examples 'scan', '[a[.a-b.]c]',
    2 => [:set,     :open,           '[',          2,  3],
    3 => [:literal, :literal,        '.',          3,  4],
    4 => [:literal, :literal,        'a',          4,  5]
  include_examples 'scan', '[a[=e=]c]',
    2 => [:set,     :open,           '[',          2,  3],
    3 => [:literal, :literal,        '=',          3,  4],
    4 => [:literal, :literal,        'e',          4,  5]

  # multi-byte characters should not affect indices
  include_examples 'scan', /[れます]/,
    0 => [:set,     :open,           '[',          0, 1],
    1 => [:literal, :literal,        'れ',          1, 2],
    2 => [:literal, :literal,        'ま',          2, 3],
    3 => [:literal, :literal,        'す',          3, 4],
    4 => [:set,     :close,          ']',          4, 5]

  specify('set literal encoding') do
    text = RS.scan('[a]')[1][2].to_s
    expect(text).to eq 'a'
    expect(text.encoding.to_s).to eq 'UTF-8'

    text = RS.scan("[\u{1F632}]")[1][2].to_s
    expect(text).to eq "\u{1F632}"
    expect(text.encoding.to_s).to eq 'UTF-8'
  end
end