File: predefined_sets.rb

package info (click to toggle)
ruby-character-set 1.8.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 416 kB
  • sloc: ansic: 2,597; ruby: 1,290; makefile: 7; sh: 4
file content (42 lines) | stat: -rw-r--r-- 1,349 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
class CharacterSet
  module PredefinedSets
    Dir[File.join(__dir__, 'predefined_sets', '*.cps')].each do |path|
      set_name = File.basename(path, '.cps')

      class_eval <<-RUBY, __FILE__, __LINE__ + 1
        def #{set_name}
          @#{set_name} ||= build_from_cps_file('#{path}').freeze
        end

        def non_#{set_name}
          @non_#{set_name} ||= build_from_cps_file('#{path}').inversion.freeze
        end
      RUBY
    end

    alias all                      any
    alias ascii_letters            ascii_letter
    alias basic_multilingual_plane bmp
    alias blank                    whitespace
    alias invalid                  surrogate
    alias valid                    unicode

    def build_from_cps_file(path)
      if defined?(Ractor) && Ractor.current != Ractor.main
        raise <<-EOS.gsub(/^ */, '')
          CharacterSet's predefined sets are lazy-loaded.
          Pre-load them to use them in Ractors. E.g.:

          CharacterSet.ascii # pre-load
          Ractor.new { CharacterSet.ascii.size }.take # => 128
          Ractor.new { 'abc'.keep_character_set(:ascii) }.take # => 'abc'
        EOS
      end

      File.readlines(path).inject(new) do |set, line|
        range_start, range_end = line.split(',')
        set.merge((range_start.to_i(16))..(range_end.to_i(16)))
      end
    end
  end
end