1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
|
class CharacterSet
module RubyFallback
module CharacterSetMethods
module ClassMethods
def from_ranges(*ranges)
new(Array(ranges).flat_map(&:to_a))
end
def of_string(str)
raise ArgumentError, 'pass a String' unless str.respond_to?(:codepoints)
str.encode('utf-8').each_codepoint.with_object(new) { |cp, set| set << cp }
end
end
def inversion(include_surrogates: false, upto: 0x10FFFF)
new_set = self.class.new
0.upto(upto) do |cp|
next unless include_surrogates || cp > 0xDFFF || cp < 0xD800
new_set << cp unless include?(cp)
end
new_set
end
def case_insensitive
new_set = dup
each do |cp|
swapped_cps = cp.chr('utf-8').swapcase.codepoints
swapped_cps.size == 1 && new_set << swapped_cps[0]
end
new_set
end
def ranges
CharacterSet.require_optional_dependency('range_compressor', __method__)
RangeCompressor.compress(self)
end
def sample(count = nil)
count.nil? ? to_a(true).sample : to_a(true).sample(count)
end
def count_in(string)
utf8_str!(string).each_codepoint.count { |cp| include?(cp) }
end
def cover?(string)
utf8_str!(string).each_codepoint { |cp| return false unless include?(cp) }
true
end
def delete_in(string)
utf8_str!(string).each_codepoint.with_object('') do |cp, new_str|
include?(cp) || (new_str << cp)
end.encode(string.encoding)
end
def delete_in!(string)
result = delete_in(string)
result.size == string.size ? nil : string.replace(result)
end
def keep_in(string)
utf8_str!(string).each_codepoint.with_object('') do |cp, new_str|
include?(cp) && (new_str << cp)
end.encode(string.encoding)
end
def keep_in!(string)
result = keep_in(string)
result.size == string.size ? nil : string.replace(result)
end
def scan(string)
utf8_str!(string).each_codepoint.with_object([]) do |cp, arr|
arr.push(cp.chr('utf-8')) if include?(cp)
end
end
def used_by?(string)
utf8_str!(string).each_codepoint { |cp| return true if include?(cp) }
false
end
def section(from:, upto: 0x10FFFF)
dup.keep_if { |cp| cp >= from && cp <= upto }
end
def count_in_section(from:, upto: 0x10FFFF)
count { |cp| cp >= from && cp <= upto }
end
def section?(from:, upto: 0x10FFFF)
any? { |cp| cp >= from && cp <= upto }
end
def section_ratio(from:, upto: 0x10FFFF)
section(from: from, upto: upto).count / count.to_f
end
def planes
plane_size = 0x10000.to_f
inject({}) { |hash, cp| hash.merge((cp / plane_size).floor => 1) }.keys
end
def plane(num)
validate_plane_number(num)
section(from: (num * 0x10000), upto: ((num + 1) * 0x10000) - 1)
end
def member_in_plane?(num)
validate_plane_number(num)
((num * 0x10000)...((num + 1) * 0x10000)).any? { |cp| include?(cp) }
end
private
def validate_plane_number(num)
num >= 0 && num <= 16 or raise ArgumentError, 'plane must be between 0 and 16'
end
def utf8_str!(obj)
raise ArgumentError, 'pass a String' unless obj.respond_to?(:codepoints)
obj.encode('utf-8')
end
end
end
end
|