File: i18n.at

package info (click to toggle)
pspp 0.8.4-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 35,692 kB
  • ctags: 20,600
  • sloc: ansic: 218,288; sh: 12,890; xml: 11,342; perl: 715; lisp: 597; makefile: 157
file content (106 lines) | stat: -rw-r--r-- 4,711 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
AT_BANNER([i18n recoding])

# CHECK_I18N_RECODE([TITLE], [FROM-CODING], [TO-CODING],
#                   [FROM-TEXT], [TO-TEXT])
#
# Converts FROM-TEXT from FROM-CODING to TO-CODING and checks that the result
# is TO-TEXT.  The "printf" program is applied to both FROM-TEXT and TO-TEXT to
# allow for backslash-escapes.  (Hex escapes are not portable; use octal
# escapes instead.)
m4_define([CHECK_I18N_RECODE],
  [AT_SETUP([convert $1])
   AT_KEYWORDS([i18n])

   dnl Skip the test if this host doesn't know the source and target encodings.
   AT_CHECK([i18n-test supports_encodings '$2' '$3'])
   AT_CHECK_UNQUOTED([i18n-test recode '$2' '$3' `printf '$4'`], [0], [`printf '$5'`
])
   AT_CLEANUP])
     
CHECK_I18N_RECODE([reflexively], [ASCII], [ASCII], [abc], [abc])
CHECK_I18N_RECODE([without any change], [ASCII], [UTF-8], [abc], [abc])

CHECK_I18N_RECODE([from ISO-8859-1 to UTF-8], [ISO-8859-1], [UTF-8],
                  [\242], [\302\242])
CHECK_I18N_RECODE([from UTF-8 to ISO-8859-1], [UTF-8], [ISO-8859-1],
                  [\302\242], [\242])

# 0xc0 == 0300 is invalid in UTF-8
CHECK_I18N_RECODE([invalid UTF-8 to ISO-8859-1], [UTF-8], [ISO-8859-1],
                  [xy\300z], [xy?z])
# 0xc2 == 0302 is the first byte of a 2-byte UTF-8 sequence
CHECK_I18N_RECODE([truncated UTF-8 to ISO-8559-1], [UTF-8], [ISO-8859-1],
                  [xy\302], [xy?])

# Checks for a bug that caused the last character to be dropped in conversions
# from encodings that have combining diacritics (e.g. windows-1258).
CHECK_I18N_RECODE([dropped final character in windows-1258], [windows-1258],
                  [UTF-8], [aeiou], [aeiou])

dnl The input to this test is 7 bytes long and the expected output is 9 bytes.
dnl So it should exercise the E2BIG case 
CHECK_I18N_RECODE([from ISO-8859-1 to UTF-8 with overflow], 
                  [ISO-8859-1], [UTF-8],
                  [Tsch\374\337!], [Tsch\303\274\303\237!])

AT_SETUP([convert unknown encoding])
AT_KEYWORDS([i18n])
AT_CHECK([i18n-test recode nonexistent1 nonexistent2 asdf], [0], [asdf
],
  [Warning: cannot create a converter for `nonexistent1' to `nonexistent2': Invalid argument
])
AT_CLEANUP

AT_BANNER([i18n concatenation])

# CHECK_I18N_CONCAT([HEAD], [TAIL], [ENCODING], [MAX-LEN], [ANSWER])
#
# Concatenates HEAD and TAIL, omitting as many characters from HEAD as needed
# to make the result come out to no more than MAX-LEN bytes if it was expressed
# in ENCODING, and checks that the answer matches ANSWER.  HEAD, TAIL, and
# ANSWER are all in UTF-8.  The "printf" program is applied to HEAD, TAIL, and
# ANSWER to allow for backslash-escapes.  (Hex escapes are not portable; use
# octal escapes instead.)
m4_define([CHECK_I18N_CONCAT],
  [AT_SETUP([m4_if([$2], [], [truncate "$1" to $4 bytes in $3],
                             [truncate "$1" + "$2" to $4 bytes in $3])])
   AT_KEYWORDS([i18n])

   dnl Skip the test if this host doesn't know the encoding.
   AT_CHECK([i18n-test supports_encodings '$3'])
   AT_CHECK_UNQUOTED(
     [i18n-test concat "`printf '$1'`" "`printf '$2'`" '$3' '$4'], [0],
     [`printf '$5'`
])
   AT_CLEANUP])

CHECK_I18N_CONCAT([abc], [], [UTF-8], [6], [abc])
CHECK_I18N_CONCAT([], [xyz], [UTF-8], [6], [xyz])
CHECK_I18N_CONCAT([], [], [UTF-8], [6], [])
CHECK_I18N_CONCAT([abcdefghij], [], [UTF-8], [6], [abcdef])
CHECK_I18N_CONCAT([], [tuvwxyz], [UTF-8], [6], [tuvwxyz])

CHECK_I18N_CONCAT([abc], [xyz], [UTF-8], [6], [abcxyz])
CHECK_I18N_CONCAT([abcd], [xyz], [UTF-8], [6], [abcxyz])
CHECK_I18N_CONCAT([abc], [uvwxyz], [UTF-8], [6], [uvwxyz])

# x in a box ( x⃞ ) is U+0078, U+20DE, 4 bytes in UTF-8, and one grapheme
# cluster.
CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [0], [y])
CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [1], [y])
CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [2], [y])
CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [3], [y])
CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [4], [y])
CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [5], [x\342\203\236y])

# éèä is only 3 bytes in ISO-8859-1.
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [0], [xyz])
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [1], [xyz])
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [2], [xyz])
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [3], [xyz])
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [4], 
                  [\303\251xyz])
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [5],
                  [\303\251\303\250xyz])
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [6],
                  [\303\251\303\250\303\244xyz])