1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
|
# frozen_string_literal: true
class Regexp::MatchLength
include Enumerable
def self.of(obj)
exp = obj.is_a?(Regexp::Expression::Base) ? obj : Regexp::Parser.parse(obj)
exp.match_length
end
def initialize(exp, opts = {})
self.exp_class = exp.class
self.min_rep = exp.repetitions.min
self.max_rep = exp.repetitions.max
if (base = opts[:base])
self.base_min = base
self.base_max = base
self.reify = ->{ '.' * base }
else
self.base_min = opts.fetch(:base_min)
self.base_max = opts.fetch(:base_max)
self.reify = opts.fetch(:reify)
end
end
def each(opts = {})
return enum_for(__method__, opts) unless block_given?
limit = opts[:limit] || 1000
yielded = 0
(min..max).each do |num|
next unless include?(num)
yield(num)
break if (yielded += 1) >= limit
end
end
def endless_each
return enum_for(__method__) unless block_given?
(min..max).each { |num| yield(num) if include?(num) }
end
def include?(length)
test_regexp.match?('X' * length)
end
def fixed?
min == max
end
def min
min_rep * base_min
end
def max
max_rep * base_max
end
def minmax
[min, max]
end
def inspect
type = exp_class.name.sub('Regexp::Expression::', '')
"#<#{self.class}<#{type}> min=#{min} max=#{max}>"
end
def to_re
/(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
end
private
attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
if Regexp.method_defined?(:match?) # ruby >= 2.4
def test_regexp
@test_regexp ||= /^#{to_re}$/
end
else
def test_regexp
@test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
end
end
end
module Regexp::Expression
MatchLength = Regexp::MatchLength
[
CharacterSet,
CharacterSet::Intersection,
CharacterSet::IntersectedSequence,
CharacterSet::Range,
CharacterType::Base,
EscapeSequence::Base,
PosixClass,
UnicodeProperty::Base,
].each do |klass|
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
def match_length
MatchLength.new(self, base: 1)
end
RUBY
end
class Literal
def match_length
MatchLength.new(self, base: text.length)
end
end
class Subexpression
def match_length
MatchLength.new(self,
base_min: map { |exp| exp.match_length.min }.inject(0, :+),
base_max: map { |exp| exp.match_length.max }.inject(0, :+),
reify: ->{ map { |exp| exp.match_length.to_re }.join })
end
def inner_match_length
dummy = Regexp::Expression::Root.construct
dummy.expressions = expressions.map(&:clone)
dummy.quantifier = quantifier && quantifier.clone
dummy.match_length
end
end
[
Alternation,
Conditional::Expression,
].each do |klass|
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
def match_length
MatchLength.new(self,
base_min: map { |exp| exp.match_length.min }.min,
base_max: map { |exp| exp.match_length.max }.max,
reify: ->{ map { |exp| exp.match_length.to_re }.join('|') })
end
RUBY
end
[
Anchor::Base,
Assertion::Base,
Conditional::Condition,
FreeSpace,
Keep::Mark,
].each do |klass|
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
def match_length
MatchLength.new(self, base: 0)
end
RUBY
end
class Backreference::Base
def match_length
if referenced_expression.nil?
raise ArgumentError, 'Missing referenced_expression - not parsed?'
end
referenced_expression.unquantified_clone.match_length
end
end
class EscapeSequence::CodepointList
def match_length
MatchLength.new(self, base: codepoints.count)
end
end
# Special case. Absence group can match 0.. chars, irrespective of content.
# TODO: in theory, they *can* exclude match lengths with `.`: `(?~.{3})`
class Group::Absence
def match_length
MatchLength.new(self, base_min: 0, base_max: Float::INFINITY, reify: ->{ '.*' })
end
end
end
|