File: shared_methods.rb

package info (click to toggle)
ruby-character-set 1.8.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 416 kB
  • sloc: ansic: 2,597; ruby: 1,290; makefile: 7; sh: 4
file content (206 lines) | stat: -rw-r--r-- 6,021 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#
# Various methods shared by the pure-Ruby and the extended implementation.
#
# Many of these methods are hotspots, so they are defined directly on
# the including classes for better performance.
#
class CharacterSet
  module SharedMethods
    def self.included(klass)
      klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
        LoadError = Class.new(::LoadError)

        class << self
          def [](*args)
            new(Array(args))
          end

          def of(*args)
            args.map do |arg|
              arg.is_a?(Regexp) ? of_regexp(arg) : of_string(arg)
            end.reduce(:merge) || new
          end

          def parse(string)
            codepoints = Parser.codepoints_from_bracket_expression(string)
            result = new(codepoints)
            string.start_with?('[^') ? result.inversion : result
          end

          def of_property(property_name)
            require_optional_dependency('regexp_property_values', __method__)

            property = RegexpPropertyValues[property_name.to_s]
            from_ranges(*property.matched_ranges)
          end

          def of_regexp(regexp)
            require_optional_dependency('regexp_parser', __method__)

            root = ::Regexp::Parser.parse(regexp)
            of_expression(root)
          end

          def of_expression(expression)
            ExpressionConverter.convert(expression, self)
          end

          def require_optional_dependency(name, method)
            required_optional_dependencies[name] ||= begin
              require name
              true
            rescue ::LoadError
              raise LoadError, 'You must install the optional dependency '\
                               "'\#{name}' to use the method `\#{method}'."
            end
          end

          def required_optional_dependencies
            @required_optional_dependencies ||= {}
          end
        end # class << self

        def initialize(enumerable = [])
          merge(Parser.codepoints_from_enumerable(enumerable))
        end

        def replace(enum)
          unless [Array, CharacterSet, Range].include?(enum.class)
            enum = self.class.new(enum)
          end
          clear
          merge(enum)
        end

        # CharacterSet-specific conversion methods

        def assigned_part
          self & self.class.assigned
        end

        def valid_part
          self - self.class.surrogate
        end

        # CharacterSet-specific stringification methods

        def to_s(opts = {}, &block)
          Writer.write(ranges, opts, &block)
        end

        def to_s_with_surrogate_ranges
          Writer.write_surrogate_ranges(bmp_part.ranges, astral_part.ranges)
        end

        def to_s_with_surrogate_alternation
          Writer.write_surrogate_alternation(bmp_part.ranges, astral_part.ranges)
        end

        def secure_token(length = 32)
          CharacterSet.require_optional_dependency('securerandom', __method__)
          cps = to_a
          len = cps.count
          1.upto(length).map { cps[SecureRandom.random_number(len)] }.pack('U*')
        end
        alias random_token secure_token

        def inspect
          len = length
          "#<#{klass.name}: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>"
        end

        # C-extension adapter methods. Need overriding in pure fallback.
        # Parsing kwargs in C is slower, verbose, and kinda deprecated.

        def inversion(include_surrogates: false, upto: 0x10FFFF)
          ext_inversion(include_surrogates, upto)
        end

        def section(from:, upto: 0x10FFFF)
          ext_section(from, upto)
        end

        def count_in_section(from:, upto: 0x10FFFF)
          ext_count_in_section(from, upto)
        end

        def section?(from:, upto: 0x10FFFF)
          ext_section?(from, upto)
        end

        def section_ratio(from:, upto: 0x10FFFF)
          ext_section_ratio(from, upto)
        end

        #
        # The following methods are here for `Set` compatibility, but they are
        # comparatively slow. Prefer others.
        #
        def map!
          block_given? or return enum_for(__method__) { size }
          arr = []
          each { |cp| arr << yield(cp) }
          replace(arr)
        end
        alias collect! map!

        def reject!(&block)
          block_given? or return enum_for(__method__) { size }
          old_size = size
          delete_if(&block)
          self if size != old_size
        end

        def select!(&block)
          block_given? or return enum_for(__method__) { size }
          old_size = size
          keep_if(&block)
          self if size != old_size
        end
        alias filter! select!

        def classify
          block_given? or return enum_for(__method__) { size }
          each_with_object({}) { |cp, h| (h[yield(cp)] ||= self.class.new).add(cp) }
        end

        def divide(&func)
          require 'character_set/ruby_fallback'
          CharacterSet::RubyFallback::Set.new(to_a).divide(&func)
        end

        def join(separator = '')
          to_a(true).join(separator)
        end
      RUBY

      # CharacterSet-specific section methods

      {
        ascii:  0..0x7F,
        bmp:    0..0xFFFF,
        astral: 0x10000..0x10FFFF,
      }.each do |section_name, range|
        klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
          def #{section_name}_part
            section(from: #{range.begin}, upto: #{range.end})
          end

          def #{section_name}_part?
            section?(from: #{range.begin}, upto: #{range.end})
          end

          def #{section_name}_only?
            #{range.begin == 0 ?
              "!section?(from: #{range.end}, upto: 0x10FFFF)" :
              "!section?(from: 0, upto: #{range.begin})"}
          end

          def #{section_name}_ratio
            section_ratio(from: #{range.begin}, upto: #{range.end})
          end
        RUBY
      end
    end # self.included
  end # SharedMethods
end