# Aliases for character sets, derived from the list distributed with ICU

# The format of this file is as follows:
# Each line contains a converter, followed by a list of aliases. The preferred
# names for display purposes are marked with a leading asterisk (*). If none of
# the aliases is marked as display name, the name of the converter is used.
# Note that names will be normalized, so for example the names IBM-1201 and
# ibm1201 will be considered equal (see the libtranscript documentation for
# details on normalization).

# Converter names must start in column 1. Lines can be continued by indenting
# the second and later lines with any amount of white-space.

# Some converters need special handling. For this purpose, there are several
# special tags that may be used in the list of aliases. These are:
# :disable        Disables this converter. Note that this also prohibits any
#                 other converter with any of the names specified.
# :probe_load     The converter must be loaded and the probe routine of the
#                 converter must be called to establish its presence. Normal
#                 operation merely checks the presence of the .ltc file.

# Unicode encodings:
# ==================

# All UTF-X variants have multiple different IBM names for different versions
# of the Unicode standard and with and without IBM Private Use Area (PUA).
# FIXME: The current grouping doesn't destinguish between different Unicode
# versions, eventhough the IBM names do. That doesn't seem right, because if
# you really specify a version, you would expect the output to be compatible
# with that version. So an error should be reported if you use codepoints that
# are not available in that version.

# The UTF-16 and UTF-32 encodings will write a BOM and produce output in the
# platform byte order. The UTF-16 and UTF-32 encodings will switch endianess
# when encountering a BOM on input. By default they use the big endian encoding.

UTF-8 IBM-1208 IBM-1209 IBM-5304 IBM-5305 IBM-13496 IBM-13497
	IBM-17592 ibm-17593 Windows-65001 cp1208
# Use UCS-2 as an alias for UTF-16, although it is actually UTF-16BE. However,
# because people simply use UCS-2 and UTF-16 interchangebly, it is better to
# do it this way.
UTF-16 ISO-10646-UCS-2 IBM-1204 IBM-1205 unicode csUnicode ucs-2
UTF-16BE x-utf-16be UnicodeBigUnmarked IBM-1200 IBM-1201
	IBM-13488 IBM-13489 ibm-17584 IBM-17585 IBM-21680 IBM-21681 IBM-25776
	IBM-25777 IBM-29872 ibm-29873 IBM-61955 IBM-61956 Windows-1201 cp1200
	cp1201 UTF16_BigEndian
UTF-16LE x-utf-16le UnicodeLittleUnmarked IBM-1202 IBM-1203
	IBM-13490 ibm-13491 IBM-17586 IBM-17587 IBM-21682 IBM-21683 IBM-25778
	IBM-25779 ibm-29874 IBM-29875 UTF16_LittleEndian Windows-1200
UTF-32 ISO-10646-UCS-4 IBM-1236 IBM-1237 csUCS4 ucs-4
UTF-32BE UTF32_BigEndian IBM-1232 IBM-1233 IBM-9424
UTF-32LE UTF32_LittleEndian IBM-1234 IBM-1235

# These special names force a particular endianess on output, but further are
# equivalent to the UTF-16 and UTF-32 converters.
*x-UTF-16BE-BOM *UnicodeBig
*x-UTF-16LE-BOM *UnicodeLittle
x-UTF-32BE-BOM
x-UTF-32LE-BOM

# To allow reading from/writing to UTF-8 files which happen to have a BOM
# encoded as their first character:
x-UTF-8-BOM

UTF-7 Windows-65000

CESU-8 IBM-9400

GB18030 :probe_load IBM-1392 Windows-54936

# Non-unicode encodings:
# ======================

*ISO-8859-1 IBM-819 cp819 *Latin1 8859_1 csISOLatin1 iso-ir-100
	ISO_8859-1:1987 l1 819
*ASCII *US-ASCII ANSI_X3.4-1968 ANSI_X3.4-1986 ISO_646.irv:1991
	iso_646.irv:1983 ISO646-US us csASCII iso-ir-6 cp367 ascii7 646
	windows-20127 IBM-367

iso-8859_2-1999 ibm-912_P100-1995 IBM-912 *ISO-8859-2 ISO_8859-2:1987 *Latin2
	csISOLatin2 iso-ir-101 l2 8859_2 cp912 912 Windows-28592
iso-8859_3-1999 ibm-913_P100-2000 IBM-913 *ISO-8859-3 ISO_8859-3:1988 *Latin3
	csISOLatin3 iso-ir-109 l3 8859_3 cp913 913 Windows-28593
iso-8859_4-1998 ibm-914_P100-1995 IBM-914 *ISO-8859-4 *Latin4 csISOLatin4
	iso-ir-110 ISO_8859-4:1988 l4 8859_4 cp914 914 Windows-28594
iso-8859_5-1999 ibm-915_P100-1995 IBM-915 *ISO-8859-5 *Cyrillic
	csISOLatinCyrillic iso-ir-144 ISO_8859-5:1988 8859_5 cp915 915
	Windows-28595

iso-8859_6-1999 ibm-1089_P100-1995 IBM-1089 *ISO-8859-6 arabic csISOLatinArabic
	iso-ir-127 ISO_8859-6:1987 ECMA-114 ASMO-708 8859_6 cp1089
	1089 Windows-28596 ISO-8859-6-I ISO-8859-6-E

iso-8859_7-2003 ibm-9005_X110-2007 IBM-9005 *ISO-8859-7 greek greek8 ELOT_928
	ECMA-118 csISOLatinGreek iso-ir-126 Windows-28597 sun_eu_greek
	ISO_8859-7:2003 8859_7

# Same as IBM-9005, but without the euro update.
iso-8859_7-1987 ibm-813_P100-1995 *IBM-813 ISO_8859-7:1987 cp813 813

iso-8859_8-1999 ibm-5012_P100-1999 IBM-5012 *ISO-8859-8 hebrew csISOLatinHebrew
	iso-ir-138 ISO_8859-8:1988 ISO-8859-8-I ISO-8859-8-E 8859_8 Windows-28598
	hebrew8

iso-8859_9-1999 ibm-920_P100-1995 IBM-920 *ISO-8859-9 *Latin5 csISOLatin5
	iso-ir-148 ISO_8859-9:1989 l5 8859_9 cp920 920 Windows-28599 ECMA-128
	turkish8 turkish
iso-8859_10-1998 *ISO-8859-10 iso-ir-157 l6 ISO_8859-10:1992 csISOLatin6
	*Latin6
iso-8859_11-2001 *ISO-8859-11 thai8

iso-8859_13-1998 ibm-921_P100-1995 IBM-921 *ISO-8859-13 8859_13 Windows-28603
	cp921 921
iso-8859_14-1998 *ISO-8859-14 iso-ir-199 *Latin8 iso-celtic l8 # ISO_8859-14:1998
iso-8859_15-1999 ibm-923_P100-1998 IBM-923 *ISO-8859-15 *Latin9 l9 8859_15
	latin0 csisolatin0 csisolatin9 iso8859_15_fdis cp923 923 Windows-28605

# The Shift-JIS naming is a mess. There are at least 10 different encodings all
# claiming to be Shift-JIS.

Shift_JIS sjis pck x-sjis
Shift_JIS-2004
Shift_JISX0213

EUC-JP ibm-33722_P12A_P12A-2004_U2
	Extended_UNIX_Code_Packed_Format_for_Japanese csEUCPkdFmtJapanese
	X-EUC-JP Windows-51932 IBM-33722_VPUA IBM-eucJP
EUC-JIS-2004
EUC-JISX0213

# GBK
windows-936-2000 *GBK CP936 MS936 *Windows-936

# Technically, GB2312 doesn't include ASCII, which EUC-CN does. However, it is
# often used as an alias for EUC-CN, so we do that as well.
*EUC-CN *GB2312

# EUC-KR and similar. Java KSC names are ignored in favor of IANA ones
EUC-KR ibm-970_P110_P110-2006_U2 IBM-970 Windows-51949 csEUCKR IBM-eucKR
	cp970 970 IBM-970_VPUA

# Windows EUC-KR variant. KSC names ignored in favor of IANA ones. CP949 is
# normally this one
cp949 windows-949-2000 *Windows-949 ms949

EUC-TW :probe_load ibm-964_P110-1999 IBM-964 IBM-eucTW cns11643 cp964 964 IBM-964_VPUA
EUC-TW-2004

ibm-437_P100-1995 *IBM-437 cp437 437 csPC8CodePage437 Windows-437

ibm-878_P100-1996 IBM-878 *KOI8-R koi8 csKOI8R Windows-20866 cp878
ibm-1168_P100-2002 IBM-1168 *KOI8-U Windows-21866

# Windows 125*
ibm-5346_P100-1998 IBM-5346 *Windows-1250 cp1250
ibm-5347_P100-1998 IBM-5347 *Windows-1251 cp1251 ANSI1251
ibm-5348_P100-1997 IBM-5348 *Windows-1252 cp1252
ibm-5349_P100-1998 IBM-5349 *Windows-1253 cp1253
ibm-5350_P100-1998 IBM-5350 *Windows-1254 cp1254
ibm-9447_P100-2002 IBM-9447 *Windows-1255 cp1255
ibm-9448_X100-2005 IBM-9448 *Windows-1256 cp1256
ibm-9449_P100-2002 IBM-9449 *Windows-1257 cp1257
ibm-5354_P100-1998 IBM-5354 *Windows-1258 cp1258

*ISO-2022-JP :probe_load csISO2022JP JIS_Encoding csJISEncoding JIS JIS7 JIS8
*ISO-2022-JP-1 :probe_load IBM-5054
*ISO-2022-JP-2 :probe_load csISO2022JP2
ISO-2022-JP-3 :probe_load
ISO-2022-JP-2004 :probe_load
*ISO-2022-KR :probe_load csISO2022KR
*ISO-2022-CN :probe_load csISO2022CN
ISO-2022-CN-EXT :probe_load

ibm-37_P100-1995 *IBM-37 ebcdic-cp-us ebcdic-cp-ca
	ebcdic-cp-wt ebcdic-cp-nl csIBM037 cp037 037 cpibm37
ibm-1047_P100-1995 *IBM-1047 cp1047 1047
*KOI8-RU ibm-1167_P100-2002 *IBM-1167
jis-x-0201-1969 ibm-897_P100-1995 *IBM-897 *JIS-X-0201 X0201 csHalfWidthKatakana
iso-8859_16-2001 *ISO-8859-16 iso-ir-226 *Latin10 l10 # ISO_8859-16:2001

viscii *VISCII iso-ir-180

# Big5 variants
*Big5 x-big5 csbig5
*Windows-950
big5-hkscs-1999 *Big5-HKSCS ibm-1375_P100-2007 IBM-1375 big5hk HKSCS-BIG5
Big5-HKSCS-2001
Big5-HKSCS-2004
Big5-HKSCS-2008