1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
|
#ifndef unicode_h
#define unicode_h
/*
** Copyright 2000-2001 Double Precision, Inc.
** See COPYING for distribution information.
**
** $Id: unicode.h,v 1.17 2004/05/23 14:28:25 mrsam Exp $
*/
#ifdef __cplusplus
extern "C" {
#endif
#include "../unicode/unicode_config.h" /* VPATH build */
#include <stdlib.h>
#include <stdio.h>
#if HAVE_WCHAR_H
#include <wchar.h>
#endif
#if HAVE_STDDEF_H
#include <stddef.h>
#endif
typedef wchar_t unicode_char;
struct unicode_info {
const char *chset; /* Official character set */
int flags; /* Flags */
#define UNICODE_UTF 1 /* Direct UTF mapping */
#define UNICODE_MB 2 /* Multibyte characters present */
#define UNICODE_SISO 4 /*
** Composite mapping, using shift in/out
** (verbatim text comparison may not work,
** must convert to UTF, or something).
** (replaces search_chset).
*/
#define UNICODE_USASCII 8 /* Character set is a US-ASCII superset */
#define UNICODE_REPLACEABLE 16 /*
* Conversion errors can be replaced by
* adequate placeholders (replacement
* characters).
*/
#define UNICODE_HEADER_QUOPRI 32 /*
** Quoted-printable (Q) encoding is preferred
** for MIME message headers.
*/
#define UNICODE_HEADER_BASE64 64 /*
** Base64 (B) encoding is preferred
** for MIME message headers.
*/
#define UNICODE_BODY_QUOPRI 128 /*
** Quoted-printable (Q) encoding is preferred
** MIME message body.
*/
#define UNICODE_BODY_BASE64 256 /*
** Base64 (B) encoding is preferred
** for MIME message body.
*/
unicode_char *(*c2u)(const struct unicode_info *, const char *, int *);
/* Convert character string in this charset to unicode */
char *(*u2c)(const struct unicode_info *, const unicode_char *, int *);
/* Convert unicode to character string in this charset */
/* Convert the string in this character set to upper/lower/titlecase */
char *(*toupper_func)(const struct unicode_info *,
const char *, int *);
char *(*tolower_func)(const struct unicode_info *,
const char *, int *);
char *(*totitle_func)(const struct unicode_info *,
const char *, int *);
const struct unicode_info *search_chset;
} ;
extern const struct unicode_info unicode_ISO8859_1;
extern const struct unicode_info unicode_UTF8;
extern const struct unicode_info unicode_IMAP_MODUTF7;
extern char *unicode_iso8859_u2c(const unicode_char *, int *,
const unicode_char *);
extern char *unicode_windows874_u2c(const unicode_char *, int *,
const unicode_char *);
/* ISO8859 charsets all share the same functions */
extern unicode_char *unicode_iso8859_c2u(const char *, int *,
const unicode_char *);
extern char *unicode_iso8859_convert(const char *, int *,
const char *);
/* IBM864 charset has some funkiness */
unicode_char *unicode_ibm864_c2u(const char *, int *,
const unicode_char *);
char *unicode_ibm864_u2c(const unicode_char *, int *,
const unicode_char *);
struct unicode_chsetlist {
const char *chsetname;
const struct unicode_info *ptr;
} ;
extern const struct unicode_chsetlist unicode_chsetlist[];
extern const char *unicode_default_chset();
extern const struct unicode_info *unicode_find(const char *);
/*
** UTF8 functions
*/
/* Convert Unicode to/from UTF-8 */
extern char *unicode_toutf8(const unicode_char *);
extern unicode_char *unicode_fromutf8(const char *);
/* Unicode upper/lower/title case conversion functions */
extern unicode_char unicode_uc(unicode_char);
extern unicode_char unicode_lc(unicode_char);
extern unicode_char unicode_tc(unicode_char);
/* Convert charsets to/from UTF-8 */
extern char *unicode_ctoutf8(const struct unicode_info *, const char *,
int *);
extern char *unicode_cfromutf8(const struct unicode_info *, const char *,
int *);
/* Internal functions: */
extern unicode_char *unicode_utf8_tou(const char *, int *);
extern char *unicode_utf8_fromu(const unicode_char *, int *);
size_t unicode_utf8_fromu_pass(const unicode_char *, char *);
#define UNICODE_UTF8_MAXLEN 6
extern char *unicode_convert(const char *txt,
const struct unicode_info *from,
const struct unicode_info *to);
/* errno=EINVAL if conversion could not be performed */
extern char *unicode_xconvert(const char *txt,
const struct unicode_info *from,
const struct unicode_info *to);
/* Like unicode_convert(), except unconvertable chars are replaced
** by periods (or something similar), instead of aborting with EINVAL
*/
extern char *unicode_convert_fromchset(const char *txt,
const char *from,
const struct unicode_info *to);
/* Like, unicode_convert, except that we search for a character set
** from a list of chsets we support.
** errno=EINVAL if 'to' character set does not exist.
*/
/*
** Convert between unicode and modified-UTF7 encoding used for
** IMAP folder names.
*/
unicode_char *unicode_modutf7touc(const char *s, int *err);
/* err < 0 if out of memory, else ptr to first illegal modutf7-char */
/* This can be used to test if string is a valid mod-utf7 string */
char *unicode_uctomodutf7(const unicode_char *);
char *unicode_uctomodutf7x(const unicode_char *, const unicode_char *);
#ifdef __cplusplus
}
#endif
#endif
|