File: i18n.at

package info (click to toggle)
pspp 2.0.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 66,676 kB
  • sloc: ansic: 267,210; xml: 18,446; sh: 5,534; python: 2,881; makefile: 125; perl: 64
file content (131 lines) | stat: -rw-r--r-- 5,729 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
dnl PSPP - a program for statistical analysis.
dnl Copyright (C) 2017 Free Software Foundation, Inc.
dnl
dnl This program is free software: you can redistribute it and/or modify
dnl it under the terms of the GNU General Public License as published by
dnl the Free Software Foundation, either version 3 of the License, or
dnl (at your option) any later version.
dnl
dnl This program is distributed in the hope that it will be useful,
dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
dnl GNU General Public License for more details.
dnl
dnl You should have received a copy of the GNU General Public License
dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
dnl
AT_BANNER([i18n recoding])

# CHECK_I18N_RECODE([TITLE], [FROM-CODING], [TO-CODING],
#                   [FROM-TEXT], [TO-TEXT])
#
# Converts FROM-TEXT from FROM-CODING to TO-CODING and checks that the result
# is TO-TEXT.  Octal backslash-escapes are supported in FROM-TEXT and TO-TEXT.
m4_define([CHECK_I18N_RECODE],
  [AT_SETUP([convert $1])
   AT_KEYWORDS([i18n])

   dnl Skip the test if this host doesn't know the source and target encodings.
   AT_CHECK([i18n-test supports_encodings '$2' '$3'])
   AT_CHECK_UNQUOTED([i18n-test recode '$2' '$3' '$4'], [0], [`printf '$5'`
])
   AT_CLEANUP])

CHECK_I18N_RECODE([reflexively], [ASCII], [ASCII], [abc], [abc])
CHECK_I18N_RECODE([without any change], [ASCII], [UTF-8], [abc], [abc])

CHECK_I18N_RECODE([from ISO-8859-1 to UTF-8], [ISO-8859-1], [UTF-8],
                  [\242], [\302\242])
CHECK_I18N_RECODE([from UTF-8 to ISO-8859-1], [UTF-8], [ISO-8859-1],
                  [\302\242], [\242])

# 0xc0 == 0300 is invalid in UTF-8
AT_SETUP([convert invalid UTF-8 to ISO-8859-1])
AT_KEYWORDS([i18n])
dnl This test fails on Mac OS 14 Sonoma due to changes in the native
dnl MacOS iconv library. Skip the test on MacOS.
AT_CHECK([case $host in #(
  *-linux*) ;; #(
  *) exit 77
esac])
dnl Skip the test if this host doesn't know the source and target encodings.
AT_CHECK([i18n-test supports_encodings 'UTF-8' 'ISO-8859-1'])
AT_CHECK_UNQUOTED([i18n-test recode 'UTF-8' 'ISO-8859-1' 'xy\300z'], [0], [`printf 'xy?z'`
])
AT_CLEANUP])

# 0xc2 == 0302 is the first byte of a 2-byte UTF-8 sequence
CHECK_I18N_RECODE([truncated UTF-8 to ISO-8559-1], [UTF-8], [ISO-8859-1],
                  [xy\302], [xy?])

# Checks for a bug that caused the last character to be dropped in conversions
# from encodings that have combining diacritics (e.g. windows-1258).
CHECK_I18N_RECODE([dropped final character in windows-1258], [windows-1258],
                  [UTF-8], [aeiou], [aeiou])

dnl The input to this test is 7 bytes long and the expected output is 9 bytes.
dnl So it should exercise the E2BIG case
CHECK_I18N_RECODE([from ISO-8859-1 to UTF-8 with overflow],
                  [ISO-8859-1], [UTF-8],
                  [Tsch\374\337!], [Tsch\303\274\303\237!])

AT_SETUP([convert unknown encoding])
AT_KEYWORDS([i18n])
AT_CHECK([i18n-test recode nonexistent1 nonexistent2 asdf], [0], [asdf
],
  [Warning: cannot create a converter for `nonexistent1' to `nonexistent2': Invalid argument
])
AT_CLEANUP

AT_BANNER([i18n concatenation])

# CHECK_I18N_CONCAT([HEAD], [TAIL], [ENCODING], [MAX-LEN], [ANSWER])
#
# Concatenates HEAD and TAIL, omitting as many characters from HEAD as needed
# to make the result come out to no more than MAX-LEN bytes if it was expressed
# in ENCODING, and checks that the answer matches ANSWER.  HEAD, TAIL, and
# ANSWER are all in UTF-8.  The "printf" program is applied to HEAD, TAIL, and
# ANSWER to allow for backslash-escapes.  (Hex escapes are not portable; use
# octal escapes instead.)
m4_define([CHECK_I18N_CONCAT],
  [AT_SETUP([m4_if([$2], [], [truncate "$1" to $4 bytes in $3],
                             [truncate "$1" + "$2" to $4 bytes in $3])])
   AT_KEYWORDS([i18n])

   dnl Skip the test if this host doesn't know the encoding.
   AT_CHECK([i18n-test supports_encodings '$3'])
   AT_CHECK_UNQUOTED(
     [i18n-test concat '$1' '$2' '$3' '$4'], [0], [`printf '$5'`
])
   AT_CLEANUP])

CHECK_I18N_CONCAT([abc], [], [UTF-8], [6], [abc])
CHECK_I18N_CONCAT([], [xyz], [UTF-8], [6], [xyz])
CHECK_I18N_CONCAT([], [], [UTF-8], [6], [])
CHECK_I18N_CONCAT([abcdefghij], [], [UTF-8], [6], [abcdef])
CHECK_I18N_CONCAT([], [tuvwxyz], [UTF-8], [6], [tuvwxyz])

CHECK_I18N_CONCAT([abc], [xyz], [UTF-8], [6], [abcxyz])
CHECK_I18N_CONCAT([abcd], [xyz], [UTF-8], [6], [abcxyz])
CHECK_I18N_CONCAT([abc], [uvwxyz], [UTF-8], [6], [uvwxyz])

# x in a box ( x⃞ ) is U+0078, U+20DE, 4 bytes in UTF-8, and one grapheme
# cluster.
CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [0], [y])
CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [1], [y])
CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [2], [y])
CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [3], [y])
CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [4], [y])
CHECK_I18N_CONCAT([x\342\203\236], [y], [UTF-8], [5], [x\342\203\236y])

# éèä is only 3 bytes in ISO-8859-1.
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [0], [xyz])
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [1], [xyz])
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [2], [xyz])
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [3], [xyz])
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [4],
                  [\303\251xyz])
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [5],
                  [\303\251\303\250xyz])
CHECK_I18N_CONCAT([\303\251\303\250\303\244], [xyz], [ISO-8859-1], [6],
                  [\303\251\303\250\303\244xyz])