File: multi.h

package info (click to toggle)
jless 332iso242-2
  • links: PTS
  • area: main
  • in suites: potato, slink
  • size: 1,152 kB
  • ctags: 1,384
  • sloc: ansic: 16,157; sh: 203; makefile: 110; awk: 7
file content (216 lines) | stat: -rw-r--r-- 7,348 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
/*
 * Copyright (c) 1997,1998  Kazushi (Jam) Marukawa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice in the documentation and/or other materials provided with 
 *    the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 
 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN 
 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */


/*
 * Definition of values to specify the character set.
 * And definitions some well known character sets and a types of set.
 */
typedef unsigned short CHARSET;

/*
 * The structure of CHARSET: 
 *
 *   151413121110 9 8 7 6 5 4 3 2 1 0
 *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *   |r|    IRR    |m|n|      F      |
 *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *
 * r: true if it is not first byte of multi bytes character.
 * IRR: represented identification of revisions of registered character
 *      sets (IRR) ranged from 00/01 to 03/15.  The 00/00 means no IRR.
 *      The real IRR is ranged from 00/01 to 03/15, and coded from 04/00
 *	to 07/14.
 * m: true if it is one byte in bytes of multi bytes character.
 * n: true if it is one of 96 or 96x96 graphic sets, otherwise it is one
 *    of 94 or 94x94 graphic sets.
 * F: represented final byte which choose one of graphi sets ranged from
 *    00/00 to 04/14.  The real final byte is coded from 03/00 to 07/14.
 */

#define	REST_MASK		0x8000		/* r */
#define CSISHEAD(cs)		(!((cs) & REST_MASK))
#define CSISREST(cs)		((cs) & REST_MASK)

#define IRR_MASK		0x7e00		/* IRR */
#define IRR_SHIFT		9
#define CS2IRR(cs)		(((cs) & IRR_MASK) >> IRR_SHIFT)
#define IRR2CS(irr)		(((irr) << IRR_SHIFT) & IRR_MASK)

#define CODE_MASK		0x003f		/* coded IRR in ISO 2022 */
#define CODE_DIFF		0x0040
#define IRR2CODE(irr)		((((irr) - 1) & CODE_MASK) + CODE_DIFF)
#define CODE2IRR(code)		((((code) - CODE_DIFF) & CODE_MASK) + 1)

#define TYPE_94_CHARSET		0x0000		/* m & n */
#define TYPE_96_CHARSET		0x0080
#define TYPE_94N_CHARSET	0x0100
#define TYPE_96N_CHARSET	0x0180
#define TYPE_MASK		0x0180
#define CS2TYPE(cs)		((cs) & TYPE_MASK)
#define TYPE2CS(type)		((type) & TYPE_MASK)

#define FT_MASK			0x007f		/* F */
#define FT_DIFF			0x0030
#define CS2FT(cs)		(((cs) & FT_MASK) + FT_DIFF)
#define FT2CS(ft)		(((ft) - FT_DIFF) & FT_MASK)

/*
 * Each character sets is represented by IRR, TYPE and FT.
 */
#define CHARSET_MASK		(IRR_MASK | TYPE_MASK | FT_MASK)
#define CS2CHARSET(cs)		((cs) & CHARSET_MASK)

/*
 * There is a reserved empty set in every type of charset.  07/14.
 * So we cannot use (CS2CHARSET(cs) == WRONGCS) to check it.
 */
#define CSISWRONG(cs)		(CS2FT(cs) == '~')

/*
 * List of representative character sets.
 */
#define ASCII			(TYPE_94_CHARSET | FT2CS('B'))
#define WRONGCS			(TYPE_94_CHARSET | FT2CS('~'))
#if ISO
#define JISX0201KANA		(TYPE_94_CHARSET | FT2CS('I'))
#define JISX0201ROMAN		(TYPE_94_CHARSET | FT2CS('J'))
#define LATIN1			(TYPE_96_CHARSET | FT2CS('A'))
#define LATIN2			(TYPE_96_CHARSET | FT2CS('B'))
#define LATIN3			(TYPE_96_CHARSET | FT2CS('C'))
#define LATIN4			(TYPE_96_CHARSET | FT2CS('D'))
#define GREEK			(TYPE_96_CHARSET | FT2CS('F'))
#define ARABIC			(TYPE_96_CHARSET | FT2CS('G'))
#define HEBREW			(TYPE_96_CHARSET | FT2CS('H'))
#define CYRILLIC		(TYPE_96_CHARSET | FT2CS('L'))
#define LATIN5			(TYPE_96_CHARSET | FT2CS('M'))
#define JISX0208_78KANJI	(TYPE_94N_CHARSET | FT2CS('@'))
#define GB2312			(TYPE_94N_CHARSET | FT2CS('A'))
#define JISX0208KANJI		(TYPE_94N_CHARSET | FT2CS('B'))
#define JISX0208_90KANJI	(IRR2CS(1) | TYPE_94N_CHARSET | FT2CS('B'))
#define KSC5601			(TYPE_94N_CHARSET | FT2CS('C'))
#define JISX0212KANJISUP	(TYPE_94N_CHARSET | FT2CS('D'))
#if JAPANESE
/*
 * Special number for Japanese code set.  Only input_set use following with
 * above definitions.  The 07/15 is not valid for F.  Thus I use it to
 * indicate the special character sets.
 */
#define SJIS			(IRR2CS(1) | TYPE_94N_CHARSET | FT_MASK)
#define UJIS			(IRR2CS(2) | TYPE_94N_CHARSET | FT_MASK)
#endif
#endif

/*
 * List of special characters and character set for it.
 *
 *	A terminator of string with character set is represented by
 *    both a NULCH and a NULLCS.  A padding character in string with
 *    character set is represented by both a PADCH and a NULLCS.  A
 *    binary data '\0' and '\1' are represented by both '\0' and a
 *    WRONGCS, and both '\1' and a WRONGCS respectively.
 */
#define NULCH			('\0')
#define PADCH			('\1')
#define NULLCS			(ASCII)

/*
 * Macros for easy checking.
 */
#define CSISASCII(cs)		(CS2CHARSET(cs) == ASCII)
#define CSISNULLCS(cs)		(CS2CHARSET(cs) == NULLCS)


/*
 * Definition of values to specify the character set and character.
 */
typedef int CHARVAL;

#define MAKECV(ch, cs)		(((cs) << 8 * sizeof(char)) | ch)
#define CV2CH(cv)		((cv) & ((1 << 8 * sizeof(char)) - 1))
#define CV2CS(cv)		((cv) >> 8 * sizeof(char))


/*
 * Definition of code sets.  The code set is not character set.
 * It is only means of code, and we use these value when we
 * decide what input data are.
 */
typedef enum {
	/* code sets for left, right and output plane */
	noconv,		/* A code set which doesn't need converting */
	/* code sets for left and output plane */
	jis,		/* JIS means a sub set of iso2022 */
	iso7,		/* A code set which is extented by iso2022 */
	/* code sets for only right plane */
	none,		/* No code set */
	japanese,	/* Both of UJIS and SJIS */
	/* code sets for right and output plane */
	ujis,		/* Japanese code set named UJIS */
	sjis,		/* Japanese code set named SJIS */
	iso8		/* A code set which is extented by iso2022 */
} CODESET;


/*
 * struct multibuf is internal data structure for multi.c.
 * Defines it name only.
 */
typedef struct multibuf MULBUF;


/*
 * in multi.c
 */
extern int set_planeset ();
extern void init_def_codesets ();
extern void init_def_priority ();
extern void init_priority ();
extern CODESET get_priority ();
extern void set_priority ();
extern MULBUF * new_multi ();
extern void clear_multi ();
extern void init_multi ();
extern void buffering_multi ();
extern void parsing_multi ();
extern void set_codesets ();
extern int get_bufbytes ();
extern void set_bufbytes ();
extern char * get_icharset_string ();
extern char * outchar();
extern char * outbuf();
extern int mwidth();
extern char * rotate_right_codeset ();
extern int strlen_cs();
extern int chlen_cs();
extern char* strdup_cs();

/*
 * in unify.c
 */
extern void chconvert_cs();
extern void chunify_cs();
extern int chcmp_cs();