1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
#include "wc.h"
#include "wtf.h"
#include "iso2022.h"
#include "hz.h"
#ifdef USE_UNICODE
#include "ucs.h"
#include "utf8.h"
#include "utf7.h"
#endif
char *WcReplace = "?";
char *WcReplaceW = "??";
static Str wc_conv_to_ces(Str is, wc_ces ces);
Str
wc_Str_conv(Str is, wc_ces f_ces, wc_ces t_ces)
{
if (f_ces != WC_CES_WTF)
is = (*WcCesInfo[WC_CES_INDEX(f_ces)].conv_from)(is, f_ces);
if (t_ces != WC_CES_WTF)
return wc_conv_to_ces(is, t_ces);
else
return is;
}
Str
wc_Str_conv_strict(Str is, wc_ces f_ces, wc_ces t_ces)
{
Str os;
wc_option opt = WcOption;
WcOption.strict_iso2022 = WC_TRUE;
WcOption.no_replace = WC_TRUE;
WcOption.fix_width_conv = WC_FALSE;
os = wc_Str_conv(is, f_ces, t_ces);
WcOption = opt;
return os;
}
static Str
wc_conv_to_ces(Str is, wc_ces ces)
{
Str os;
wc_uchar *sp = (wc_uchar *)is->ptr;
wc_uchar *ep = sp + is->length;
wc_uchar *p;
wc_status st;
switch (ces) {
case WC_CES_HZ_GB_2312:
for (p = sp; p < ep && *p != '~' && *p < 0x80; p++)
;
break;
case WC_CES_TCVN_5712:
case WC_CES_VISCII_11:
case WC_CES_VPS:
for (p = sp; p < ep && 0x20 <= *p && *p < 0x80; p++)
;
break;
default:
for (p = sp; p < ep && *p < 0x80; p++)
;
break;
}
if (p == ep)
return is;
os = Strnew_size(is->length);
if (p > sp)
p--; /* for precompose */
if (p > sp)
Strcat_charp_n(os, is->ptr, (int)(p - sp));
wc_output_init(ces, &st);
switch (ces) {
case WC_CES_ISO_2022_JP:
case WC_CES_ISO_2022_JP_2:
case WC_CES_ISO_2022_JP_3:
case WC_CES_ISO_2022_CN:
case WC_CES_ISO_2022_KR:
case WC_CES_HZ_GB_2312:
case WC_CES_TCVN_5712:
case WC_CES_VISCII_11:
case WC_CES_VPS:
#ifdef USE_UNICODE
case WC_CES_UTF_8:
case WC_CES_UTF_7:
#endif
while (p < ep)
(*st.ces_info->push_to)(os, wtf_parse(&p), &st);
break;
default:
while (p < ep) {
if (*p < 0x80 && wtf_width(p + 1)) {
Strcat_char(os, (char)*p);
p++;
} else
(*st.ces_info->push_to)(os, wtf_parse(&p), &st);
}
break;
}
wc_push_end(os, &st);
return os;
}
Str
wc_Str_conv_with_detect(Str is, wc_ces *f_ces, wc_ces hint, wc_ces t_ces)
{
wc_ces detect;
if (*f_ces == WC_CES_WTF || hint == WC_CES_WTF) {
*f_ces = WC_CES_WTF;
detect = WC_CES_WTF;
} else if (WcOption.auto_detect == WC_OPT_DETECT_OFF) {
*f_ces = hint;
detect = hint;
} else {
if (*f_ces & WC_CES_T_8BIT)
hint = *f_ces;
detect = wc_auto_detect(is->ptr, is->length, hint);
if (WcOption.auto_detect == WC_OPT_DETECT_ON) {
if ((detect & WC_CES_T_8BIT) ||
((detect & WC_CES_T_NASCII) && ! (*f_ces & WC_CES_T_8BIT)))
*f_ces = detect;
} else {
if ((detect & WC_CES_T_ISO_2022) && ! (*f_ces & WC_CES_T_8BIT))
*f_ces = detect;
}
}
return wc_Str_conv(is, detect, t_ces);
}
void
wc_push_end(Str os, wc_status *st)
{
if (st->ces_info->id & WC_CES_T_ISO_2022)
wc_push_to_iso2022_end(os, st);
else if (st->ces_info->id == WC_CES_HZ_GB_2312)
wc_push_to_hz_end(os, st);
#ifdef USE_UNICODE
else if (st->ces_info->id == WC_CES_UTF_8)
wc_push_to_utf8_end(os, st);
else if (st->ces_info->id == WC_CES_UTF_7)
wc_push_to_utf7_end(os, st);
#endif
}
|