1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
#! /usr/local/bin/ruby -KU
# -*- coding: utf-8 -*-
require 'unicode'
## dump Unicode string
class String
def udump
ustr = self.unpack("U*")
ret = []
ustr.each do |e|
if e.is_a?(Integer)
ret << "U+%04X" % e
else
ret << e
end
end
ret
end
end
print "Canonical decomposition vs compatibility decomposition\n"
p Unicode::decompose("⑽ o\xef\xac\x83ce").udump
p Unicode::decompose_compat("⑽ o\xef\xac\x83ce")
print "Canonical equivalent vs Compatibility equivalent\n"
p Unicode::strcmp("ガ", "ガ")
p Unicode::strcmp("ガ", "ガ")
p Unicode::strcmp_compat("ガ", "ガ")
print "Decomposition/composition\n"
p Unicode::normalize_D([0x63, 0x301, 0x327].pack("U*")).udump
p Unicode::normalize_D([0x63, 0x327, 0x301].pack("U*")).udump
p Unicode::normalize_D([0x107, 0x327].pack("U*")).udump
p Unicode::normalize_D([0xe7, 0x301].pack("U*")).udump
p Unicode::normalize_C([0x63, 0x301, 0x327].pack("U*")).udump
p Unicode::normalize_C([0x63, 0x327, 0x301].pack("U*")).udump
p Unicode::normalize_C([0x107, 0x327].pack("U*")).udump
p Unicode::normalize_C([0xe7, 0x301].pack("U*")).udump
print "Kana Normalization\n"
p Unicode::normalize_D("ガガ").udump
p Unicode::normalize_C("ガガ").udump
p Unicode::normalize_KD("ガガ").udump
p Unicode::normalize_KC("ガガ").udump
print "Hangul\n"
p "요시담".udump
p Unicode::normalize_D("요시담").udump
p Unicode::normalize_C("요시담").udump
print "Composition Exclusion\n"
print " ANGSTROM SIGN [U+212B]\n"
p Unicode::normalize_D([0x212b].pack("U")).udump
p Unicode::normalize_C([0x212b].pack("U")).udump
print " LATIN CAPITAL LETTER A WITH RING ABOVE [U+00C5]\n"
p Unicode::normalize_D([0x00c5].pack("U")).udump
p Unicode::normalize_C([0x00c5].pack("U")).udump
print "Case conversion\n"
p Unicode::normalize_C(Unicode::upcase([0x63, 0x301, 0x327, 0xff41].pack("U*"))).udump
p Unicode::normalize_C(Unicode::downcase([0x43, 0x301, 0x327, 0xff21].pack("U*"))).udump
p Unicode::capitalize([0x1f1, 0x41, 0x61, 0xff21].pack("U*")).udump
## Local variables:
## coding: utf-8
## End:
|