File: unicode.h

package info (click to toggle)
cone 0.75-1
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 31,040 kB
  • ctags: 13,930
  • sloc: ansic: 90,648; cpp: 79,781; sh: 18,355; perl: 3,218; makefile: 1,611; yacc: 289; sed: 16
file content (190 lines) | stat: -rw-r--r-- 5,246 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#ifndef	unicode_h
#define	unicode_h

/*
** Copyright 2000-2001 Double Precision, Inc.
** See COPYING for distribution information.
**
** $Id: unicode.h,v 1.17 2004/05/23 14:28:25 mrsam Exp $
*/

#ifdef	__cplusplus
extern "C" {
#endif

#include	"../unicode/unicode_config.h" /* VPATH build */

#include	<stdlib.h>

#include	<stdio.h>
#if HAVE_WCHAR_H
#include	<wchar.h>
#endif

#if HAVE_STDDEF_H
#include	<stddef.h>
#endif

typedef wchar_t unicode_char;

struct unicode_info {
	const char *chset;		/* Official character set */
	int flags;			/* Flags */

#define UNICODE_UTF	1		/* Direct UTF mapping */
#define UNICODE_MB	2		/* Multibyte characters present */
#define UNICODE_SISO	4	/*
				** Composite mapping, using shift in/out
				** (verbatim text comparison may not work,
				** must convert to UTF, or something).
				** (replaces search_chset).
				*/

#define UNICODE_USASCII 8	/* Character set is a US-ASCII superset */
#define	UNICODE_REPLACEABLE	16	/*
				 * Conversion errors can be replaced by
				 * adequate placeholders (replacement
				 * characters).
				 */
#define	UNICODE_HEADER_QUOPRI	32	/*
				** Quoted-printable (Q) encoding is preferred
				** for MIME message headers.
				*/
#define	UNICODE_HEADER_BASE64	64	/*
				** Base64 (B) encoding is preferred
				** for MIME message headers.
				*/
#define	UNICODE_BODY_QUOPRI	128	/*
				** Quoted-printable (Q) encoding is preferred
				** MIME message body.
				*/
#define	UNICODE_BODY_BASE64	256	/*
				** Base64 (B) encoding is preferred
				** for MIME message body.
				*/

	unicode_char *(*c2u)(const struct unicode_info *, const char *, int *);
		/* Convert character string in this charset to unicode */

	char *(*u2c)(const struct unicode_info *, const unicode_char *, int *);
		/* Convert unicode to character string in this charset */

	/* Convert the string in this character set to upper/lower/titlecase */

	char *(*toupper_func)(const struct unicode_info *,
			      const char *, int *);
	char *(*tolower_func)(const struct unicode_info *,
			      const char *, int *);
	char *(*totitle_func)(const struct unicode_info *,
			      const char *, int *);

	const struct unicode_info *search_chset;
	} ;

extern const struct unicode_info unicode_ISO8859_1;
extern const struct unicode_info unicode_UTF8;
extern const struct unicode_info unicode_IMAP_MODUTF7;

extern char *unicode_iso8859_u2c(const unicode_char *, int *,
	const unicode_char *);

extern char *unicode_windows874_u2c(const unicode_char *, int *,
	const unicode_char *);

/* ISO8859 charsets all share the same functions */

extern unicode_char *unicode_iso8859_c2u(const char *, int *,
					const unicode_char *);

extern char *unicode_iso8859_convert(const char *, int *,
					const char *);

/* IBM864 charset has some funkiness */

unicode_char *unicode_ibm864_c2u(const char *, int *,
				 const unicode_char *);

char *unicode_ibm864_u2c(const unicode_char *, int *,
			 const unicode_char *);


struct unicode_chsetlist {
	const char *chsetname;
	const struct unicode_info *ptr;
	} ;

extern const struct unicode_chsetlist unicode_chsetlist[];
extern const char *unicode_default_chset();
extern const struct unicode_info *unicode_find(const char *);

/*
** UTF8 functions
*/

	/* Convert Unicode to/from UTF-8 */

extern char *unicode_toutf8(const unicode_char *);
extern unicode_char *unicode_fromutf8(const char *);

	/* Unicode upper/lower/title case conversion functions */

extern unicode_char unicode_uc(unicode_char);
extern unicode_char unicode_lc(unicode_char);
extern unicode_char unicode_tc(unicode_char);

	/* Convert charsets to/from UTF-8 */

extern char *unicode_ctoutf8(const struct unicode_info *, const char *,
			     int *);
extern char *unicode_cfromutf8(const struct unicode_info *, const char *,
			       int *);

	/* Internal functions: */

extern unicode_char *unicode_utf8_tou(const char *, int *);
extern char *unicode_utf8_fromu(const unicode_char *, int *);

size_t unicode_utf8_fromu_pass(const unicode_char *, char *);

#define UNICODE_UTF8_MAXLEN	6

extern char *unicode_convert(const char *txt,
			     const struct unicode_info *from,
			     const struct unicode_info *to);
	/* errno=EINVAL if conversion could not be performed */

extern char *unicode_xconvert(const char *txt,
			      const struct unicode_info *from,
			      const struct unicode_info *to);
	/* Like unicode_convert(), except unconvertable chars are replaced
	** by periods (or something similar), instead of aborting with EINVAL
	*/


extern char *unicode_convert_fromchset(const char *txt,
				    const char *from,
				    const struct unicode_info *to);
	/* Like, unicode_convert, except that we search for a character set
	** from a list of chsets we support.
	** errno=EINVAL if 'to' character set does not exist.
	*/

	/*
	** Convert between unicode and modified-UTF7 encoding used for
	** IMAP folder names.
	*/

unicode_char *unicode_modutf7touc(const char *s, int *err);

	/* err < 0 if out of memory, else ptr to first illegal modutf7-char */
	/* This can be used to test if string is a valid mod-utf7 string */

char *unicode_uctomodutf7(const unicode_char *);

char *unicode_uctomodutf7x(const unicode_char *, const unicode_char *);

#ifdef	__cplusplus
}
#endif

#endif