1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
|
#
# Various methods shared by the pure-Ruby and the extended implementation.
#
# Many of these methods are hotspots, so they are defined directly on
# the including classes for better performance.
#
class CharacterSet
module SharedMethods
def self.included(klass)
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
LoadError = Class.new(::LoadError)
class << self
def [](*args)
new(Array(args))
end
def of(*args)
args.map do |arg|
arg.is_a?(Regexp) ? of_regexp(arg) : of_string(arg)
end.reduce(:merge) || new
end
def parse(string)
codepoints = Parser.codepoints_from_bracket_expression(string)
result = new(codepoints)
string.start_with?('[^') ? result.inversion : result
end
def of_property(property_name)
require_optional_dependency('regexp_property_values', __method__)
property = RegexpPropertyValues[property_name.to_s]
from_ranges(*property.matched_ranges)
end
def of_regexp(regexp)
require_optional_dependency('regexp_parser', __method__)
root = ::Regexp::Parser.parse(regexp)
of_expression(root)
end
def of_expression(expression)
ExpressionConverter.convert(expression, self)
end
def require_optional_dependency(name, method)
required_optional_dependencies[name] ||= begin
require name
true
rescue ::LoadError
raise LoadError, 'You must install the optional dependency '\
"'\#{name}' to use the method `\#{method}'."
end
end
def required_optional_dependencies
@required_optional_dependencies ||= {}
end
end # class << self
def initialize(enumerable = [])
merge(Parser.codepoints_from_enumerable(enumerable))
end
def replace(enum)
unless [Array, CharacterSet, Range].include?(enum.class)
enum = self.class.new(enum)
end
clear
merge(enum)
end
# CharacterSet-specific conversion methods
def assigned_part
self & self.class.assigned
end
def valid_part
self - self.class.surrogate
end
# CharacterSet-specific stringification methods
def to_s(opts = {}, &block)
Writer.write(ranges, opts, &block)
end
def to_s_with_surrogate_ranges
Writer.write_surrogate_ranges(bmp_part.ranges, astral_part.ranges)
end
def to_s_with_surrogate_alternation
Writer.write_surrogate_alternation(bmp_part.ranges, astral_part.ranges)
end
def secure_token(length = 32)
CharacterSet.require_optional_dependency('securerandom', __method__)
cps = to_a
len = cps.count
1.upto(length).map { cps[SecureRandom.random_number(len)] }.pack('U*')
end
alias random_token secure_token
def inspect
len = length
"#<#{klass.name}: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>"
end
# C-extension adapter methods. Need overriding in pure fallback.
# Parsing kwargs in C is slower, verbose, and kinda deprecated.
def inversion(include_surrogates: false, upto: 0x10FFFF)
ext_inversion(include_surrogates, upto)
end
def section(from:, upto: 0x10FFFF)
ext_section(from, upto)
end
def count_in_section(from:, upto: 0x10FFFF)
ext_count_in_section(from, upto)
end
def section?(from:, upto: 0x10FFFF)
ext_section?(from, upto)
end
def section_ratio(from:, upto: 0x10FFFF)
ext_section_ratio(from, upto)
end
#
# The following methods are here for `Set` compatibility, but they are
# comparatively slow. Prefer others.
#
def map!
block_given? or return enum_for(__method__) { size }
arr = []
each { |cp| arr << yield(cp) }
replace(arr)
end
alias collect! map!
def reject!(&block)
block_given? or return enum_for(__method__) { size }
old_size = size
delete_if(&block)
self if size != old_size
end
def select!(&block)
block_given? or return enum_for(__method__) { size }
old_size = size
keep_if(&block)
self if size != old_size
end
alias filter! select!
def classify
block_given? or return enum_for(__method__) { size }
each_with_object({}) { |cp, h| (h[yield(cp)] ||= self.class.new).add(cp) }
end
def divide(&func)
require 'character_set/ruby_fallback'
CharacterSet::RubyFallback::Set.new(to_a).divide(&func)
end
def join(separator = '')
to_a(true).join(separator)
end
RUBY
# CharacterSet-specific section methods
{
ascii: 0..0x7F,
bmp: 0..0xFFFF,
astral: 0x10000..0x10FFFF,
}.each do |section_name, range|
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
def #{section_name}_part
section(from: #{range.begin}, upto: #{range.end})
end
def #{section_name}_part?
section?(from: #{range.begin}, upto: #{range.end})
end
def #{section_name}_only?
#{range.begin == 0 ?
"!section?(from: #{range.end}, upto: 0x10FFFF)" :
"!section?(from: 0, upto: #{range.begin})"}
end
def #{section_name}_ratio
section_ratio(from: #{range.begin}, upto: #{range.end})
end
RUBY
end
end # self.included
end # SharedMethods
end
|