File: gc.rb

package info (click to toggle)
ruby-unicode-utils 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 1,988 kB
  • sloc: ruby: 1,877; makefile: 4
file content (42 lines) | stat: -rw-r--r-- 1,096 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# -*- encoding: utf-8 -*-

require "unicode_utils/read_cdata"

module UnicodeUtils

  GENERAL_CATEGORY_PER_CP_MAP =
    Impl.read_general_category_per_cp("general_category_per_cp") # :nodoc:

  GENERAL_CATEGORY_RANGES =
    Impl.read_general_category_ranges("general_category_ranges") # :nodoc:

  # Get the two letter general category alias of the given char. The
  # first letter denotes a major class, the second letter a subclass
  # of the major class.
  #
  # See section 4.5 in Unicode 6.0.0.
  #
  # Example:
  #
  #   require "unicode_utils/gc"
  #   UnicodeUtils.gc("A") # => :Lu (Letter, uppercase)
  #
  # Returns nil for ordinals outside the Unicode code point range, a
  # two letter symbol otherwise.
  #
  # See also: UnicodeUtils.general_category, UnicodeUtils.char_type
  def gc(char)
    cp = char.ord
    cat = GENERAL_CATEGORY_PER_CP_MAP[cp] and return cat
    GENERAL_CATEGORY_RANGES.each { |pair|
      return pair[1] if pair[0].cover?(cp)
    }
    if cp >= 0x0 && cp <= 0x10FFFF
      :Cn # Other, not assigned
    else
      nil
    end
  end
  module_function :gc

end