File: test.rb

package info (click to toggle)
ruby-unicode 0.4.4-2
  • links: PTS, VCS
  • area: main
  • in suites: buster, jessie, jessie-kfreebsd, stretch
  • size: 1,776 kB
  • ctags: 129
  • sloc: ansic: 1,612; ruby: 401; makefile: 2
file content (69 lines) | stat: -rw-r--r-- 2,113 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#! /usr/local/bin/ruby -KU
# -*- coding: utf-8 -*-

require 'unicode'

## dump Unicode string
class String
  def udump
    ustr = self.unpack("U*")
    ret = []
    ustr.each do |e|
      if e.is_a?(Integer)
        ret << "U+%04X" % e
      else
        ret << e
      end
    end
    ret
  end
end


print "Canonical decomposition vs compatibility decomposition\n"
p Unicode::decompose("⑽ o\xef\xac\x83ce").udump
p Unicode::decompose_compat("⑽ o\xef\xac\x83ce")

print "Canonical equivalent vs Compatibility equivalent\n"
p Unicode::strcmp("ガ", "ガ")
p Unicode::strcmp("ガ", "ガ")
p Unicode::strcmp_compat("ガ", "ガ")

print "Decomposition/composition\n"
p Unicode::normalize_D([0x63, 0x301, 0x327].pack("U*")).udump
p Unicode::normalize_D([0x63, 0x327, 0x301].pack("U*")).udump
p Unicode::normalize_D([0x107, 0x327].pack("U*")).udump
p Unicode::normalize_D([0xe7, 0x301].pack("U*")).udump
p Unicode::normalize_C([0x63, 0x301, 0x327].pack("U*")).udump
p Unicode::normalize_C([0x63, 0x327, 0x301].pack("U*")).udump
p Unicode::normalize_C([0x107, 0x327].pack("U*")).udump
p Unicode::normalize_C([0xe7, 0x301].pack("U*")).udump

print "Kana Normalization\n"
p Unicode::normalize_D("ガガ").udump
p Unicode::normalize_C("ガガ").udump
p Unicode::normalize_KD("ガガ").udump
p Unicode::normalize_KC("ガガ").udump

print "Hangul\n"
p "요시담".udump
p Unicode::normalize_D("요시담").udump
p Unicode::normalize_C("요시담").udump

print "Composition Exclusion\n"
print "   ANGSTROM SIGN [U+212B]\n"
p Unicode::normalize_D([0x212b].pack("U")).udump
p Unicode::normalize_C([0x212b].pack("U")).udump
print "   LATIN CAPITAL LETTER A WITH RING ABOVE [U+00C5]\n"
p Unicode::normalize_D([0x00c5].pack("U")).udump
p Unicode::normalize_C([0x00c5].pack("U")).udump

print "Case conversion\n"
p Unicode::normalize_C(Unicode::upcase([0x63, 0x301, 0x327, 0xff41].pack("U*"))).udump
p Unicode::normalize_C(Unicode::downcase([0x43, 0x301, 0x327, 0xff21].pack("U*"))).udump
p Unicode::capitalize([0x1f1, 0x41, 0x61, 0xff21].pack("U*")).udump


## Local variables:
## coding: utf-8
## End: