File: codepoint.rb

package info (click to toggle)
ruby-unicode-utils 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 1,988 kB
  • sloc: ruby: 1,877; makefile: 4
file content (66 lines) | stat: -rw-r--r-- 1,545 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# -*- encoding: utf-8 -*-

require "unicode_utils/char_name"

module UnicodeUtils

  # A Codepoint instance represents a single Unicode code point.
  #
  #   UnicodeUtils::Codepoint.new(0x20ac) => #<U+20AC "€" EURO SIGN utf8:e2,82,ac>
  class Codepoint

    # The Unicode codespace. Any integer in this range is a Unicode
    # code point.
    RANGE = 0..0x10FFFF

    # Create a Codepoint instance that wraps the given Integer. +int+
    # must be in Codepoint::RANGE.
    def initialize(int)
      unless RANGE.include?(int)
        raise ArgumentError, "#{int} not in codespace"
      end
      @int = int
    end

    # Convert to Integer.
    def ord
      @int
    end

    # Format in U+ notation.
    #
    #   Codepoint.new(0xc5).uplus => "U+00C5"
    def uplus
      sprintf('U+%04X', @int)
    end

    # Get the normative Unicode name of this code point.
    #
    # See also: UnicodeUtils.char_name
    def name
      UnicodeUtils.char_name(@int)
    end

    # Convert this code point to an UTF-8 encoded string. Returns a new
    # string on each call and thus it is allowed to mutate the return
    # value.
    def to_s
      @int.chr(Encoding::UTF_8)
    end

    # Get the bytes used to encode this code point in UTF-8,
    # hex-formatted.
    #
    #   Codepoint.new(0xe4).hexbytes => "c3,a4"
    def hexbytes
      to_s.bytes.map { |b| sprintf("%02x", b) }.join(",")
    end

    # #<U+... char name utf8-hexbytes>
    def inspect
      "#<#{uplus} #{to_s.inspect} #{name || "nil"} utf8:#{hexbytes}>"
    end

  end

end