1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
|
#! /bin/sh
# Output a system dependent table of character encoding aliases.
#
# Copyright (C) 2000-2001 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU Library General Public License as published
# by the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
# USA.
#
# The table consists of lines of the form
# ALIAS CANONICAL
#
# ALIAS is the (system dependent) result of "nl_langinfo (CODESET)".
# ALIAS is compared in a case sensitive way.
#
# CANONICAL is the GNU canonical name for this character encoding.
# It must be an encoding supported by libiconv. Support by GNU libc is
# also desirable. CANONICAL is case insensitive. Usually an upper case
# MIME charset name is preferred.
# The current list of GNU canonical charset names is as follows.
#
# name used by which systems a MIME name?
# ASCII, ANSI_X3.4-1968 glibc solaris freebsd
# ISO-8859-1 glibc aix hpux irix osf solaris freebsd yes
# ISO-8859-2 glibc aix hpux irix osf solaris freebsd yes
# ISO-8859-3 glibc yes
# ISO-8859-4 osf solaris freebsd yes
# ISO-8859-5 glibc aix hpux irix osf solaris freebsd yes
# ISO-8859-6 glibc aix hpux solaris yes
# ISO-8859-7 glibc aix hpux irix osf solaris yes
# ISO-8859-8 glibc aix hpux osf solaris yes
# ISO-8859-9 glibc aix hpux irix osf solaris yes
# ISO-8859-13 glibc
# ISO-8859-15 glibc aix osf solaris freebsd
# KOI8-R glibc solaris freebsd yes
# KOI8-U glibc freebsd yes
# CP437 dos
# CP775 dos
# CP850 aix osf dos
# CP852 dos
# CP855 dos
# CP856 aix
# CP857 dos
# CP861 dos
# CP862 dos
# CP864 dos
# CP865 dos
# CP866 freebsd dos
# CP869 dos
# CP874 win32 dos
# CP922 aix
# CP932 aix win32 dos
# CP943 aix
# CP949 osf win32 dos
# CP950 win32 dos
# CP1046 aix
# CP1124 aix
# CP1129 aix
# CP1250 win32
# CP1251 glibc win32
# CP1252 aix win32
# CP1253 win32
# CP1254 win32
# CP1255 win32
# CP1256 win32
# CP1257 win32
# GB2312 glibc aix hpux irix solaris freebsd yes
# EUC-JP glibc aix hpux irix osf solaris freebsd yes
# EUC-KR glibc aix hpux irix osf solaris freebsd yes
# EUC-TW glibc aix hpux irix osf solaris
# BIG5 glibc aix hpux osf solaris freebsd yes
# BIG5-HKSCS glibc
# GBK aix osf win32 dos
# GB18030 glibc
# SHIFT_JIS hpux osf solaris freebsd yes
# JOHAB glibc win32
# TIS-620 glibc aix hpux osf solaris
# VISCII glibc yes
# HP-ROMAN8 hpux
# HP-ARABIC8 hpux
# HP-GREEK8 hpux
# HP-HEBREW8 hpux
# HP-TURKISH8 hpux
# HP-KANA8 hpux
# DEC-KANJI osf
# DEC-HANYU osf
# UTF-8 glibc aix hpux osf solaris yes
#
# Note: Names which are not marked as being a MIME name should not be used in
# Internet protocols for information interchange (mail, news, etc.).
#
# Note: ASCII and ANSI_X3.4-1968 are synonymous canonical names. Applications
# must understand both names and treat them as equivalent.
#
# The first argument passed to this file is the canonical host specification,
# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
# or
# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
host="$1"
os=`echo "$host" | sed -e 's/^[^-]*-[^-]*-\(.*\)$/\1/'`
echo "# This file contains a table of character encoding aliases,"
echo "# suitable for operating system '${os}'."
echo "# It was automatically generated from config.charset."
# List of references, updated during installation:
echo "# Packages using this file: "
case "$os" in
linux* | *-gnu*)
# With glibc-2.1 or newer, we don't need any canonicalization,
# because glibc has iconv and both glibc and libiconv support all
# GNU canonical names directly. Therefore, the Makefile does not
# need to install the alias file at all.
# The following applies only to glibc-2.0.x and older libcs.
echo "ISO_646.IRV:1983 ASCII"
;;
aix*)
echo "ISO8859-1 ISO-8859-1"
echo "ISO8859-2 ISO-8859-2"
echo "ISO8859-5 ISO-8859-5"
echo "ISO8859-6 ISO-8859-6"
echo "ISO8859-7 ISO-8859-7"
echo "ISO8859-8 ISO-8859-8"
echo "ISO8859-9 ISO-8859-9"
echo "ISO8859-15 ISO-8859-15"
echo "IBM-850 CP850"
echo "IBM-856 CP856"
echo "IBM-921 ISO-8859-13"
echo "IBM-922 CP922"
echo "IBM-932 CP932"
echo "IBM-943 CP943"
echo "IBM-1046 CP1046"
echo "IBM-1124 CP1124"
echo "IBM-1129 CP1129"
echo "IBM-1252 CP1252"
echo "IBM-eucCN GB2312"
echo "IBM-eucJP EUC-JP"
echo "IBM-eucKR EUC-KR"
echo "IBM-eucTW EUC-TW"
echo "big5 BIG5"
echo "GBK GBK"
echo "TIS-620 TIS-620"
echo "UTF-8 UTF-8"
;;
hpux*)
echo "iso88591 ISO-8859-1"
echo "iso88592 ISO-8859-2"
echo "iso88595 ISO-8859-5"
echo "iso88596 ISO-8859-6"
echo "iso88597 ISO-8859-7"
echo "iso88598 ISO-8859-8"
echo "iso88599 ISO-8859-9"
echo "iso885915 ISO-8859-15"
echo "roman8 HP-ROMAN8"
echo "arabic8 HP-ARABIC8"
echo "greek8 HP-GREEK8"
echo "hebrew8 HP-HEBREW8"
echo "turkish8 HP-TURKISH8"
echo "kana8 HP-KANA8"
echo "tis620 TIS-620"
echo "big5 BIG5"
echo "eucJP EUC-JP"
echo "eucKR EUC-KR"
echo "eucTW EUC-TW"
echo "hp15CN GB2312"
#echo "ccdc ?" # what is this?
echo "SJIS SHIFT_JIS"
echo "utf8 UTF-8"
;;
irix*)
echo "ISO8859-1 ISO-8859-1"
echo "ISO8859-2 ISO-8859-2"
echo "ISO8859-5 ISO-8859-5"
echo "ISO8859-7 ISO-8859-7"
echo "ISO8859-9 ISO-8859-9"
echo "eucCN GB2312"
echo "eucJP EUC-JP"
echo "eucKR EUC-KR"
echo "eucTW EUC-TW"
;;
osf*)
echo "ISO8859-1 ISO-8859-1"
echo "ISO8859-2 ISO-8859-2"
echo "ISO8859-4 ISO-8859-4"
echo "ISO8859-5 ISO-8859-5"
echo "ISO8859-7 ISO-8859-7"
echo "ISO8859-8 ISO-8859-8"
echo "ISO8859-9 ISO-8859-9"
echo "ISO8859-15 ISO-8859-15"
echo "cp850 CP850"
echo "big5 BIG5"
echo "dechanyu DEC-HANYU"
echo "dechanzi GB2312"
echo "deckanji DEC-KANJI"
echo "deckorean EUC-KR"
echo "eucJP EUC-JP"
echo "eucKR EUC-KR"
echo "eucTW EUC-TW"
echo "GBK GBK"
echo "KSC5601 CP949"
echo "sdeckanji EUC-JP"
echo "SJIS SHIFT_JIS"
echo "TACTIS TIS-620"
echo "UTF-8 UTF-8"
;;
solaris*)
echo "646 ASCII"
echo "ISO8859-1 ISO-8859-1"
echo "ISO8859-2 ISO-8859-2"
echo "ISO8859-4 ISO-8859-4"
echo "ISO8859-5 ISO-8859-5"
echo "ISO8859-6 ISO-8859-6"
echo "ISO8859-7 ISO-8859-7"
echo "ISO8859-8 ISO-8859-8"
echo "ISO8859-9 ISO-8859-9"
echo "ISO8859-15 ISO-8859-15"
echo "koi8-r KOI8-R"
echo "BIG5 BIG5"
echo "gb2312 GB2312"
echo "cns11643 EUC-TW"
echo "5601 EUC-KR"
echo "eucJP EUC-JP"
echo "PCK SHIFT_JIS"
echo "TIS620.2533 TIS-620"
#echo "sun_eu_greek ?" # what is this?
echo "UTF-8 UTF-8"
;;
freebsd*)
# FreeBSD 4.2 doesn't have nl_langinfo(CODESET); therefore
# localcharset.c falls back to using the full locale name
# from the environment variables.
echo "C ASCII"
echo "US-ASCII ASCII"
for l in la_LN lt_LN; do
echo "$l.ASCII ASCII"
done
for l in da_DK de_AT de_CH de_DE en_AU en_CA en_GB en_US es_ES \
fi_FI fr_BE fr_CA fr_CH fr_FR is_IS it_CH it_IT la_LN \
lt_LN nl_BE nl_NL no_NO pt_PT sv_SE; do
echo "$l.ISO_8859-1 ISO-8859-1"
echo "$l.DIS_8859-15 ISO-8859-15"
done
for l in cs_CZ hr_HR hu_HU la_LN lt_LN pl_PL sl_SI; do
echo "$l.ISO_8859-2 ISO-8859-2"
done
for l in la_LN lt_LT; do
echo "$l.ISO_8859-4 ISO-8859-4"
done
for l in ru_RU ru_SU; do
echo "$l.KOI8-R KOI8-R"
echo "$l.ISO_8859-5 ISO-8859-5"
echo "$l.CP866 CP866"
done
echo "uk_UA.KOI8-U KOI8-U"
echo "zh_TW.BIG5 BIG5"
echo "zh_TW.Big5 BIG5"
echo "zh_CN.EUC GB2312"
echo "ja_JP.EUC EUC-JP"
echo "ja_JP.SJIS SHIFT_JIS"
echo "ja_JP.Shift_JIS SHIFT_JIS"
echo "ko_KR.EUC EUC-KR"
;;
beos*)
# BeOS has a single locale, and it has UTF-8 encoding.
echo "* UTF-8"
;;
msdosdjgpp*)
# DJGPP 2.03 doesn't have nl_langinfo(CODESET); therefore
# localcharset.c falls back to using the full locale name
# from the environment variables.
echo "#"
echo "# The encodings given here may not all be correct."
echo "# If you find that the encoding given for your language and"
echo "# country is not the one your DOS machine actually uses, just"
echo "# correct it in this file, and send a mail to"
echo "# Juan Manuel Guerrero <st001906@hrz1.hrz.tu-darmstadt.de>"
echo "# and Bruno Haible <haible@clisp.cons.org>."
echo "#"
echo "C ASCII"
# ISO-8859-1 languages
echo "ca CP850"
echo "ca_ES CP850"
echo "da CP865" # not CP850 ??
echo "da_DK CP865" # not CP850 ??
echo "de CP850"
echo "de_AT CP850"
echo "de_CH CP850"
echo "de_DE CP850"
echo "en CP850"
echo "en_AU CP850" # not CP437 ??
echo "en_CA CP850"
echo "en_GB CP850"
echo "en_NZ CP437"
echo "en_US CP437"
echo "en_ZA CP850" # not CP437 ??
echo "es CP850"
echo "es_AR CP850"
echo "es_BO CP850"
echo "es_CL CP850"
echo "es_CO CP850"
echo "es_CR CP850"
echo "es_CU CP850"
echo "es_DO CP850"
echo "es_EC CP850"
echo "es_ES CP850"
echo "es_GT CP850"
echo "es_HN CP850"
echo "es_MX CP850"
echo "es_NI CP850"
echo "es_PA CP850"
echo "es_PY CP850"
echo "es_PE CP850"
echo "es_SV CP850"
echo "es_UY CP850"
echo "es_VE CP850"
echo "et CP850"
echo "et_EE CP850"
echo "eu CP850"
echo "eu_ES CP850"
echo "fi CP850"
echo "fi_FI CP850"
echo "fr CP850"
echo "fr_BE CP850"
echo "fr_CA CP850"
echo "fr_CH CP850"
echo "fr_FR CP850"
echo "ga CP850"
echo "ga_IE CP850"
echo "gd CP850"
echo "gd_GB CP850"
echo "gl CP850"
echo "gl_ES CP850"
echo "id CP850" # not CP437 ??
echo "id_ID CP850" # not CP437 ??
echo "is CP861" # not CP850 ??
echo "is_IS CP861" # not CP850 ??
echo "it CP850"
echo "it_CH CP850"
echo "it_IT CP850"
echo "lt CP775"
echo "lt_LT CP775"
echo "lv CP775"
echo "lv_LV CP775"
echo "nb CP865" # not CP850 ??
echo "nb_NO CP865" # not CP850 ??
echo "nl CP850"
echo "nl_BE CP850"
echo "nl_NL CP850"
echo "nn CP865" # not CP850 ??
echo "nn_NO CP865" # not CP850 ??
echo "no CP865" # not CP850 ??
echo "no_NO CP865" # not CP850 ??
echo "pt CP850"
echo "pt_BR CP850"
echo "pt_PT CP850"
echo "sv CP850"
echo "sv_SE CP850"
# ISO-8859-2 languages
echo "cs CP852"
echo "cs_CZ CP852"
echo "hr CP852"
echo "hr_HR CP852"
echo "hu CP852"
echo "hu_HU CP852"
echo "pl CP852"
echo "pl_PL CP852"
echo "ro CP852"
echo "ro_RO CP852"
echo "sk CP852"
echo "sk_SK CP852"
echo "sl CP852"
echo "sl_SI CP852"
echo "sq CP852"
echo "sq_AL CP852"
echo "sr CP852" # CP852 or CP866 or CP855 ??
echo "sr_YU CP852" # CP852 or CP866 or CP855 ??
# ISO-8859-3 languages
echo "mt CP850"
echo "mt_MT CP850"
# ISO-8859-5 languages
echo "be CP866"
echo "be_BE CP866"
echo "bg CP866" # not CP855 ??
echo "bg_BG CP866" # not CP855 ??
echo "mk CP866" # not CP855 ??
echo "mk_MK CP866" # not CP855 ??
echo "ru KOI8-R" # not CP866 ??
echo "ru_RU KOI8-R" # not CP866 ??
# ISO-8859-6 languages
echo "ar CP864"
echo "ar_AE CP864"
echo "ar_DZ CP864"
echo "ar_EG CP864"
echo "ar_IQ CP864"
echo "ar_IR CP864"
echo "ar_JO CP864"
echo "ar_KW CP864"
echo "ar_MA CP864"
echo "ar_OM CP864"
echo "ar_QA CP864"
echo "ar_SA CP864"
echo "ar_SY CP864"
# ISO-8859-7 languages
echo "el CP869"
echo "el_GR CP869"
# ISO-8859-8 languages
echo "he CP862"
echo "he_IL CP862"
# ISO-8859-9 languages
echo "tr CP857"
echo "tr_TR CP857"
# Japanese
echo "ja CP932"
echo "ja_JP CP932"
# Chinese
echo "zh_CN GBK"
echo "zh_TW CP950" # not CP938 ??
# Korean
echo "kr CP949" # not CP934 ??
echo "kr_KR CP949" # not CP934 ??
# Thai
echo "th CP874"
echo "th_TH CP874"
# Other
echo "eo CP850"
echo "eo_EO CP850"
;;
esac
|