1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
|
/*
* Copyright (c) 1997,1998 Kazushi (Jam) Marukawa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice in the documentation and/or other materials provided with
* the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Definition of values to specify the character set.
* And definitions some well known character sets and a types of set.
*/
typedef unsigned short CHARSET;
/*
* The structure of CHARSET:
*
* 151413121110 9 8 7 6 5 4 3 2 1 0
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* |r| IRR |m|n| F |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* r: true if it is not first byte of multi bytes character.
* IRR: represented identification of revisions of registered character
* sets (IRR) ranged from 00/01 to 03/15. The 00/00 means no IRR.
* The real IRR is ranged from 00/01 to 03/15, and coded from 04/00
* to 07/14.
* m: true if it is one byte in bytes of multi bytes character.
* n: true if it is one of 96 or 96x96 graphic sets, otherwise it is one
* of 94 or 94x94 graphic sets.
* F: represented final byte which choose one of graphi sets ranged from
* 00/00 to 04/14. The real final byte is coded from 03/00 to 07/14.
*/
#define REST_MASK 0x8000 /* r */
#define CSISHEAD(cs) (!((cs) & REST_MASK))
#define CSISREST(cs) ((cs) & REST_MASK)
#define IRR_MASK 0x7e00 /* IRR */
#define IRR_SHIFT 9
#define CS2IRR(cs) (((cs) & IRR_MASK) >> IRR_SHIFT)
#define IRR2CS(irr) (((irr) << IRR_SHIFT) & IRR_MASK)
#define CODE_MASK 0x003f /* coded IRR in ISO 2022 */
#define CODE_DIFF 0x0040
#define IRR2CODE(irr) ((((irr) - 1) & CODE_MASK) + CODE_DIFF)
#define CODE2IRR(code) ((((code) - CODE_DIFF) & CODE_MASK) + 1)
#define TYPE_94_CHARSET 0x0000 /* m & n */
#define TYPE_96_CHARSET 0x0080
#define TYPE_94N_CHARSET 0x0100
#define TYPE_96N_CHARSET 0x0180
#define TYPE_MASK 0x0180
#define CS2TYPE(cs) ((cs) & TYPE_MASK)
#define TYPE2CS(type) ((type) & TYPE_MASK)
#define FT_MASK 0x007f /* F */
#define FT_DIFF 0x0030
#define CS2FT(cs) (((cs) & FT_MASK) + FT_DIFF)
#define FT2CS(ft) (((ft) - FT_DIFF) & FT_MASK)
/*
* Each character sets is represented by IRR, TYPE and FT.
*/
#define CHARSET_MASK (IRR_MASK | TYPE_MASK | FT_MASK)
#define CS2CHARSET(cs) ((cs) & CHARSET_MASK)
/*
* There is a reserved empty set in every type of charset. 07/14.
* So we cannot use (CS2CHARSET(cs) == WRONGCS) to check it.
*/
#define CSISWRONG(cs) (CS2FT(cs) == '~')
/*
* List of representative character sets.
*/
#define ASCII (TYPE_94_CHARSET | FT2CS('B'))
#define WRONGCS (TYPE_94_CHARSET | FT2CS('~'))
#if ISO
#define JISX0201KANA (TYPE_94_CHARSET | FT2CS('I'))
#define JISX0201ROMAN (TYPE_94_CHARSET | FT2CS('J'))
#define LATIN1 (TYPE_96_CHARSET | FT2CS('A'))
#define LATIN2 (TYPE_96_CHARSET | FT2CS('B'))
#define LATIN3 (TYPE_96_CHARSET | FT2CS('C'))
#define LATIN4 (TYPE_96_CHARSET | FT2CS('D'))
#define GREEK (TYPE_96_CHARSET | FT2CS('F'))
#define ARABIC (TYPE_96_CHARSET | FT2CS('G'))
#define HEBREW (TYPE_96_CHARSET | FT2CS('H'))
#define CYRILLIC (TYPE_96_CHARSET | FT2CS('L'))
#define LATIN5 (TYPE_96_CHARSET | FT2CS('M'))
#define JISX0208_78KANJI (TYPE_94N_CHARSET | FT2CS('@'))
#define GB2312 (TYPE_94N_CHARSET | FT2CS('A'))
#define JISX0208KANJI (TYPE_94N_CHARSET | FT2CS('B'))
#define JISX0208_90KANJI (IRR2CS(1) | TYPE_94N_CHARSET | FT2CS('B'))
#define KSC5601 (TYPE_94N_CHARSET | FT2CS('C'))
#define JISX0212KANJISUP (TYPE_94N_CHARSET | FT2CS('D'))
#if JAPANESE
/*
* Special number for Japanese code set. Only input_set use following with
* above definitions. The 07/15 is not valid for F. Thus I use it to
* indicate the special character sets.
*/
#define SJIS (IRR2CS(1) | TYPE_94N_CHARSET | FT_MASK)
#define UJIS (IRR2CS(2) | TYPE_94N_CHARSET | FT_MASK)
#endif
#endif
/*
* List of special characters and character set for it.
*
* A terminator of string with character set is represented by
* both a NULCH and a NULLCS. A padding character in string with
* character set is represented by both a PADCH and a NULLCS. A
* binary data '\0' and '\1' are represented by both '\0' and a
* WRONGCS, and both '\1' and a WRONGCS respectively.
*/
#define NULCH ('\0')
#define PADCH ('\1')
#define NULLCS (ASCII)
/*
* Macros for easy checking.
*/
#define CSISASCII(cs) (CS2CHARSET(cs) == ASCII)
#define CSISNULLCS(cs) (CS2CHARSET(cs) == NULLCS)
/*
* Definition of values to specify the character set and character.
*/
typedef int CHARVAL;
#define MAKECV(ch, cs) (((cs) << 8 * sizeof(char)) | ch)
#define CV2CH(cv) ((cv) & ((1 << 8 * sizeof(char)) - 1))
#define CV2CS(cv) ((cv) >> 8 * sizeof(char))
/*
* Definition of code sets. The code set is not character set.
* It is only means of code, and we use these value when we
* decide what input data are.
*/
typedef enum {
/* code sets for left, right and output plane */
noconv, /* A code set which doesn't need converting */
/* code sets for left and output plane */
jis, /* JIS means a sub set of iso2022 */
iso7, /* A code set which is extented by iso2022 */
/* code sets for only right plane */
none, /* No code set */
japanese, /* Both of UJIS and SJIS */
/* code sets for right and output plane */
ujis, /* Japanese code set named UJIS */
sjis, /* Japanese code set named SJIS */
iso8 /* A code set which is extented by iso2022 */
} CODESET;
/*
* struct multibuf is internal data structure for multi.c.
* Defines it name only.
*/
typedef struct multibuf MULBUF;
/*
* in multi.c
*/
extern int set_planeset ();
extern void init_def_codesets ();
extern void init_def_priority ();
extern void init_priority ();
extern CODESET get_priority ();
extern void set_priority ();
extern MULBUF * new_multi ();
extern void clear_multi ();
extern void init_multi ();
extern void buffering_multi ();
extern void parsing_multi ();
extern void set_codesets ();
extern int get_bufbytes ();
extern void set_bufbytes ();
extern char * get_icharset_string ();
extern char * outchar();
extern char * outbuf();
extern int mwidth();
extern char * rotate_right_codeset ();
extern int strlen_cs();
extern int chlen_cs();
extern char* strdup_cs();
/*
* in unify.c
*/
extern void chconvert_cs();
extern void chunify_cs();
extern int chcmp_cs();
|