File: character_set_methods.rb

package info (click to toggle)
ruby-character-set 1.8.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 416 kB
  • sloc: ansic: 2,597; ruby: 1,290; makefile: 7; sh: 4
file content (127 lines) | stat: -rw-r--r-- 3,548 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
class CharacterSet
  module RubyFallback
    module CharacterSetMethods
      module ClassMethods
        def from_ranges(*ranges)
          new(Array(ranges).flat_map(&:to_a))
        end

        def of_string(str)
          raise ArgumentError, 'pass a String' unless str.respond_to?(:codepoints)
          str.encode('utf-8').each_codepoint.with_object(new) { |cp, set| set << cp }
        end
      end

      def inversion(include_surrogates: false, upto: 0x10FFFF)
        new_set = self.class.new
        0.upto(upto) do |cp|
          next unless include_surrogates || cp > 0xDFFF || cp < 0xD800
          new_set << cp unless include?(cp)
        end
        new_set
      end

      def case_insensitive
        new_set = dup
        each do |cp|
          swapped_cps = cp.chr('utf-8').swapcase.codepoints
          swapped_cps.size == 1 && new_set << swapped_cps[0]
        end
        new_set
      end

      def ranges
        CharacterSet.require_optional_dependency('range_compressor', __method__)
        RangeCompressor.compress(self)
      end

      def sample(count = nil)
        count.nil? ? to_a(true).sample : to_a(true).sample(count)
      end

      def count_in(string)
        utf8_str!(string).each_codepoint.count { |cp| include?(cp) }
      end

      def cover?(string)
        utf8_str!(string).each_codepoint { |cp| return false unless include?(cp) }
        true
      end

      def delete_in(string)
        utf8_str!(string).each_codepoint.with_object('') do |cp, new_str|
          include?(cp) || (new_str << cp)
        end.encode(string.encoding)
      end

      def delete_in!(string)
        result = delete_in(string)
        result.size == string.size ? nil : string.replace(result)
      end

      def keep_in(string)
        utf8_str!(string).each_codepoint.with_object('') do |cp, new_str|
          include?(cp) && (new_str << cp)
        end.encode(string.encoding)
      end

      def keep_in!(string)
        result = keep_in(string)
        result.size == string.size ? nil : string.replace(result)
      end

      def scan(string)
        utf8_str!(string).each_codepoint.with_object([]) do |cp, arr|
          arr.push(cp.chr('utf-8')) if include?(cp)
        end
      end

      def used_by?(string)
        utf8_str!(string).each_codepoint { |cp| return true if include?(cp) }
        false
      end

      def section(from:, upto: 0x10FFFF)
        dup.keep_if { |cp| cp >= from && cp <= upto }
      end

      def count_in_section(from:, upto: 0x10FFFF)
        count { |cp| cp >= from && cp <= upto }
      end

      def section?(from:, upto: 0x10FFFF)
        any? { |cp| cp >= from && cp <= upto }
      end

      def section_ratio(from:, upto: 0x10FFFF)
        section(from: from, upto: upto).count / count.to_f
      end

      def planes
        plane_size = 0x10000.to_f
        inject({}) { |hash, cp| hash.merge((cp / plane_size).floor => 1) }.keys
      end

      def plane(num)
        validate_plane_number(num)
        section(from: (num * 0x10000), upto: ((num + 1) * 0x10000) - 1)
      end

      def member_in_plane?(num)
        validate_plane_number(num)
        ((num * 0x10000)...((num + 1) * 0x10000)).any? { |cp| include?(cp) }
      end

      private

      def validate_plane_number(num)
        num >= 0 && num <= 16 or raise ArgumentError, 'plane must be between 0 and 16'
      end

      def utf8_str!(obj)
        raise ArgumentError, 'pass a String' unless obj.respond_to?(:codepoints)
        obj.encode('utf-8')
      end
    end
  end
end