1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
|
/*--------------------------------------------------------------------
* Symbols referenced in this file:
* - pg_enc2name_tbl
*--------------------------------------------------------------------
*/
/*-------------------------------------------------------------------------
*
* encnames.c
* Encoding names and routines for working with them.
*
* Portions Copyright (c) 2001-2023, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/common/encnames.c
*
*-------------------------------------------------------------------------
*/
#include "c.h"
#include <ctype.h>
#include <unistd.h>
#include "mb/pg_wchar.h"
/* ----------
* All encoding names, sorted: *** A L P H A B E T I C ***
*
* All names must be without irrelevant chars, search routines use
* isalnum() chars only. It means ISO-8859-1, iso_8859-1 and Iso8859_1
* are always converted to 'iso88591'. All must be lower case.
*
* The table doesn't contain 'cs' aliases (like csISOLatin1). It's needed?
*
* Karel Zak, Aug 2001
* ----------
*/
typedef struct pg_encname
{
const char *name;
pg_enc encoding;
} pg_encname;
/* ----------
* These are "official" encoding names.
* XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
* ----------
*/
#ifndef WIN32
#define DEF_ENC2NAME(name, codepage) { #name, PG_##name }
#else
#define DEF_ENC2NAME(name, codepage) { #name, PG_##name, codepage }
#endif
const pg_enc2name pg_enc2name_tbl[] =
{
DEF_ENC2NAME(SQL_ASCII, 0),
DEF_ENC2NAME(EUC_JP, 20932),
DEF_ENC2NAME(EUC_CN, 20936),
DEF_ENC2NAME(EUC_KR, 51949),
DEF_ENC2NAME(EUC_TW, 0),
DEF_ENC2NAME(EUC_JIS_2004, 20932),
DEF_ENC2NAME(UTF8, 65001),
DEF_ENC2NAME(MULE_INTERNAL, 0),
DEF_ENC2NAME(LATIN1, 28591),
DEF_ENC2NAME(LATIN2, 28592),
DEF_ENC2NAME(LATIN3, 28593),
DEF_ENC2NAME(LATIN4, 28594),
DEF_ENC2NAME(LATIN5, 28599),
DEF_ENC2NAME(LATIN6, 0),
DEF_ENC2NAME(LATIN7, 0),
DEF_ENC2NAME(LATIN8, 0),
DEF_ENC2NAME(LATIN9, 28605),
DEF_ENC2NAME(LATIN10, 0),
DEF_ENC2NAME(WIN1256, 1256),
DEF_ENC2NAME(WIN1258, 1258),
DEF_ENC2NAME(WIN866, 866),
DEF_ENC2NAME(WIN874, 874),
DEF_ENC2NAME(KOI8R, 20866),
DEF_ENC2NAME(WIN1251, 1251),
DEF_ENC2NAME(WIN1252, 1252),
DEF_ENC2NAME(ISO_8859_5, 28595),
DEF_ENC2NAME(ISO_8859_6, 28596),
DEF_ENC2NAME(ISO_8859_7, 28597),
DEF_ENC2NAME(ISO_8859_8, 28598),
DEF_ENC2NAME(WIN1250, 1250),
DEF_ENC2NAME(WIN1253, 1253),
DEF_ENC2NAME(WIN1254, 1254),
DEF_ENC2NAME(WIN1255, 1255),
DEF_ENC2NAME(WIN1257, 1257),
DEF_ENC2NAME(KOI8U, 21866),
DEF_ENC2NAME(SJIS, 932),
DEF_ENC2NAME(BIG5, 950),
DEF_ENC2NAME(GBK, 936),
DEF_ENC2NAME(UHC, 949),
DEF_ENC2NAME(GB18030, 54936),
DEF_ENC2NAME(JOHAB, 0),
DEF_ENC2NAME(SHIFT_JIS_2004, 932)
};
/* ----------
* These are encoding names for gettext.
*
* This covers all encodings except MULE_INTERNAL, which is alien to gettext.
* ----------
*/
/*
* Table of encoding names for ICU (currently covers backend encodings only)
*
* Reference: <https://ssl.icu-project.org/icu-bin/convexp>
*
* NULL entries are not supported by ICU, or their mapping is unclear.
*/
StaticAssertDecl(lengthof(pg_enc2icu_tbl) == PG_ENCODING_BE_LAST + 1,
"pg_enc2icu_tbl incomplete");
/*
* Is this encoding supported by ICU?
*/
/*
* Returns ICU's name for encoding, or NULL if not supported
*/
/* ----------
* Encoding checks, for error returns -1 else encoding id
* ----------
*/
/*
* Remove irrelevant chars from encoding name, store at *newkey
*
* (Caller's responsibility to provide a large enough buffer)
*/
/*
* Search encoding by encoding name
*
* Returns encoding ID, or -1 if not recognized
*/
|