File: match_length.rb

package info (click to toggle)
ruby-regexp-parser 2.11.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,092 kB
  • sloc: ruby: 6,891; makefile: 6; sh: 3
file content (178 lines) | stat: -rw-r--r-- 4,250 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# frozen_string_literal: true

class Regexp::MatchLength
  include Enumerable

  def self.of(obj)
    exp = obj.is_a?(Regexp::Expression::Base) ? obj : Regexp::Parser.parse(obj)
    exp.match_length
  end

  def initialize(exp, opts = {})
    self.exp_class = exp.class
    self.min_rep = exp.repetitions.min
    self.max_rep = exp.repetitions.max
    if (base = opts[:base])
      self.base_min = base
      self.base_max = base
      self.reify = ->{ '.' * base }
    else
      self.base_min = opts.fetch(:base_min)
      self.base_max = opts.fetch(:base_max)
      self.reify = opts.fetch(:reify)
    end
  end

  def each(opts = {})
    return enum_for(__method__, opts) unless block_given?
    limit = opts[:limit] || 1000
    yielded = 0
    (min..max).each do |num|
      next unless include?(num)
      yield(num)
      break if (yielded += 1) >= limit
    end
  end

  def endless_each
    return enum_for(__method__) unless block_given?
    (min..max).each { |num| yield(num) if include?(num) }
  end

  def include?(length)
    test_regexp.match?('X' * length)
  end

  def fixed?
    min == max
  end

  def min
    min_rep * base_min
  end

  def max
    max_rep * base_max
  end

  def minmax
    [min, max]
  end

  def inspect
    type = exp_class.name.sub('Regexp::Expression::', '')
    "#<#{self.class}<#{type}> min=#{min} max=#{max}>"
  end

  def to_re
    /(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
  end

  private

  attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify

  if Regexp.method_defined?(:match?) # ruby >= 2.4
    def test_regexp
      @test_regexp ||= /^#{to_re}$/
    end
  else
    def test_regexp
      @test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
    end
  end
end

module Regexp::Expression
  MatchLength = Regexp::MatchLength

  [
    CharacterSet,
    CharacterSet::Intersection,
    CharacterSet::IntersectedSequence,
    CharacterSet::Range,
    CharacterType::Base,
    EscapeSequence::Base,
    PosixClass,
    UnicodeProperty::Base,
  ].each do |klass|
    klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
      def match_length
        MatchLength.new(self, base: 1)
      end
    RUBY
  end

  class Literal
    def match_length
      MatchLength.new(self, base: text.length)
    end
  end

  class Subexpression
    def match_length
      MatchLength.new(self,
                       base_min: map { |exp| exp.match_length.min }.inject(0, :+),
                       base_max: map { |exp| exp.match_length.max }.inject(0, :+),
                       reify: ->{ map { |exp| exp.match_length.to_re }.join })
    end

    def inner_match_length
      dummy = Regexp::Expression::Root.construct
      dummy.expressions = expressions.map(&:clone)
      dummy.quantifier = quantifier && quantifier.clone
      dummy.match_length
    end
  end

  [
    Alternation,
    Conditional::Expression,
  ].each do |klass|
    klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
      def match_length
        MatchLength.new(self,
                         base_min: map { |exp| exp.match_length.min }.min,
                         base_max: map { |exp| exp.match_length.max }.max,
                         reify: ->{ map { |exp| exp.match_length.to_re }.join('|') })
      end
    RUBY
  end

  [
    Anchor::Base,
    Assertion::Base,
    Conditional::Condition,
    FreeSpace,
    Keep::Mark,
  ].each do |klass|
    klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
      def match_length
        MatchLength.new(self, base: 0)
      end
    RUBY
  end

  class Backreference::Base
    def match_length
      if referenced_expression.nil?
        raise ArgumentError, 'Missing referenced_expression - not parsed?'
      end
      referenced_expression.unquantified_clone.match_length
    end
  end

  class EscapeSequence::CodepointList
    def match_length
      MatchLength.new(self, base: codepoints.count)
    end
  end

  # Special case. Absence group can match 0.. chars, irrespective of content.
  # TODO: in theory, they *can* exclude match lengths with `.`: `(?~.{3})`
  class Group::Absence
    def match_length
      MatchLength.new(self, base_min: 0, base_max: Float::INFINITY, reify: ->{ '.*' })
    end
  end
end