File: second_pass.rb

package info (click to toggle)
ruby-js-regex 3.8.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 164 kB
  • sloc: ruby: 1,002; makefile: 3
file content (148 lines) | stat: -rw-r--r-- 4,863 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
class JsRegex
  #
  # After conversion of a full Regexp::Expression tree, this
  # checks for Node instances that need further processing.
  #
  module SecondPass
    class << self
      def call(tree)
        substitute_root_level_keep_mark(tree)
        alternate_conditional_permutations(tree)
        tree
      end

      private

      def substitute_root_level_keep_mark(tree)
        keep_mark_index = nil
        tree.children.each.with_index do |child, i|
          break keep_mark_index = i if child.type == :keep_mark
        end
        return unless keep_mark_index

        pre = tree.children[0...keep_mark_index]
        post = tree.children[(keep_mark_index + 1)..-1]
        lookbehind = Node.new('(?<=', *pre, ')')
        tree.update(children: [lookbehind, *post])
      end

      def alternate_conditional_permutations(tree)
        permutations = conditional_tree_permutations(tree)
        return if permutations.empty?

        alternatives = permutations.map.with_index do |variant, i|
          Node.new((i.zero? ? '(?:' : '|(?:'), variant, ')')
        end
        tree.update(children: alternatives)
      end

      def conditional_tree_permutations(tree)
        conds = conditions(tree)
        return [] if conds.empty?

        caps_per_branch = captured_group_count(tree)

        condition_permutations(conds).map.with_index do |truthy_conds, i|
          tree_permutation = tree.clone
          # find referenced groups and conditionals and make one-sided
          crawl(tree_permutation) do |node|
            build_permutation(node, conds, truthy_conds, caps_per_branch, i)
          end
        end
      end

      def crawl(node, &block)
        return if node.instance_of?(String)
        yield(node)
        node.children.each { |child| crawl(child, &block) }
      end

      def conditions(tree)
        conditions = []
        crawl(tree) do |node|
          conditions << node.reference if node.type.equal?(:conditional)
        end
        conditions
      end

      def captured_group_count(tree)
        count = 0
        crawl(tree) { |node| count += 1 if node.type.equal?(:captured_group) }
        count
      end

      def condition_permutations(conditions)
        (0..(conditions.length)).inject([]) do |arr, n|
          arr + conditions.combination(n).to_a
        end
      end

      def build_permutation(node, conds, truthy_conds, caps_per_branch, i)
        truthy = truthy_conds.include?(node.reference)

        case node.type
        when :backref
          # We cannot use named groups or backrefs in the conditional expansion,
          # their repetition would cause a "Duplicate capture group name" error in JS.
          node.update(children: [
            node.children.first.sub(/k<.*>/, node.reference.to_s)
          ])
          # backref numbers need to be incremented for subsequent "branches"
          adapt_backref_to_permutation(node, caps_per_branch, i)
        when :captured_group
          # Remove name, c.f. :backref handling.
          node.update(children: [
            node.children.first.sub(/\?<.*>/, ''),
            *node.children[1..-1]
          ])
          # if the group is referenced by any condition, modulate its quantity
          if conds.include?(node.reference)
            adapt_referenced_group_to_permutation(node, truthy)
          end
        when :conditional
          adapt_conditional_to_permutation(node, truthy)
        end
      end

      def adapt_referenced_group_to_permutation(group_node, truthy)
        truthy ? min_quantify(group_node) : null_quantify(group_node)
      end

      def adapt_conditional_to_permutation(conditional_node, truthy)
        branches = conditional_node.children[1...-1]
        if branches.count == 1
          truthy || null_quantify(branches.first)
        else
          null_quantify(truthy ? branches.last : branches.first)
        end
        conditional_node.update(type: :plain)
      end

      def adapt_backref_to_permutation(backref_node, caps_per_branch, i)
        new_num = backref_node.reference + caps_per_branch * i
        backref_node.update(children: ["\\#{new_num}"])
      end

      def min_quantify(node)
        return if guarantees_at_least_one_match?(qtf = node.quantifier)

        if qtf.max.equal?(1) # any zero_or_one quantifier (?, ??, ?+)
          node.update(quantifier: nil)
        else
          min_quantifier = qtf.dup
          min_quantifier.text = "{1,#{qtf.max}}#{'?' if qtf.reluctant?}"
          node.update(quantifier: min_quantifier)
        end
      end

      def guarantees_at_least_one_match?(quantifier)
        quantifier.nil? || quantifier.min > 0
      end

      def null_quantify(node)
        null_quantifier = Regexp::Expression::Quantifier.construct(text: '{0}')
        node.update(quantifier: null_quantifier)
      end
    end
  end
end