1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
|
#include "wc.h"
#include "iso2022.h"
#include "hz.h"
#include "wtf.h"
#ifdef USE_UNICODE
#include "ucs.h"
#endif
Str
wc_conv_from_hz(Str is, wc_ces ces)
{
Str os;
wc_uchar *sp = (wc_uchar *)is->ptr;
wc_uchar *ep = sp + is->length;
wc_uchar *p;
int state = WC_HZ_NOSTATE;
for (p = sp; p < ep && *p < 0x80 && *p != WC_C_HZ_TILDA; p++)
;
if (p == ep)
return is;
os = Strnew_size(is->length);
if (p > sp)
Strcat_charp_n(os, is->ptr, (int)(p - sp));
for (; p < ep; p++) {
switch (state) {
case WC_HZ_NOSTATE:
if (*p == WC_C_HZ_TILDA)
state = WC_HZ_TILDA;
else if (WC_ISO_MAP[*p] == WC_ISO_MAP_GR)
state = WC_HZ_MBYTE1_GR; /* GB 2312 ? */
else if (*p & 0x80)
wtf_push_unknown(os, p, 1);
else
Strcat_char(os, (char)*p);
break;
case WC_HZ_TILDA:
if (*p == WC_C_HZ_SI) {
state = WC_HZ_MBYTE;
break;
} else if (*p == WC_C_HZ_TILDA)
Strcat_char(os, (char)*p);
else if (*p == '\n')
break;
else
wtf_push_unknown(os, p-1, 2);
state = WC_HZ_NOSTATE;
break;
case WC_HZ_TILDA_MB:
if (*p == WC_C_HZ_SO || *p == '\n') {
state = WC_HZ_NOSTATE;
break;
}
else if (WC_ISO_MAP[*p & 0x7f] == WC_ISO_MAP_GL)
wtf_push(os, WC_CCS_GB_2312, ((wc_uint32)*(p-1) << 8) | *p);
else
wtf_push_unknown(os, p-1, 2);
state = WC_HZ_MBYTE;
break;
case WC_HZ_MBYTE:
if (*p == WC_C_HZ_TILDA)
state = WC_HZ_TILDA_MB;
else if (WC_ISO_MAP[*p & 0x7f] == WC_ISO_MAP_GL)
state = WC_HZ_MBYTE1;
else
wtf_push_unknown(os, p, 1);
break;
case WC_HZ_MBYTE1:
if (WC_ISO_MAP[*p & 0x7f] == WC_ISO_MAP_GL)
wtf_push(os, WC_CCS_GB_2312, ((wc_uint32)*(p-1) << 8) | *p);
else
wtf_push_unknown(os, p-1, 2);
state = WC_HZ_MBYTE;
break;
case WC_HZ_MBYTE1_GR:
if (WC_ISO_MAP[*p] == WC_ISO_MAP_GR)
wtf_push(os, WC_CCS_GB_2312, ((wc_uint32)*(p-1) << 8) | *p);
else
wtf_push_unknown(os, p-1, 2);
state = WC_HZ_NOSTATE;
break;
}
}
switch (state) {
case WC_HZ_TILDA:
case WC_HZ_TILDA_MB:
case WC_HZ_MBYTE1:
case WC_HZ_MBYTE1_GR:
wtf_push_unknown(os, p-1, 1);
break;
}
return os;
}
void
wc_push_to_hz(Str os, wc_wchar_t cc, wc_status *st)
{
while (1) {
switch (cc.ccs) {
case WC_CCS_US_ASCII:
if (st->gl) {
Strcat_char(os, WC_C_HZ_TILDA);
Strcat_char(os, WC_C_HZ_SO);
st->gl = 0;
}
if ((char)cc.code == WC_C_HZ_TILDA)
Strcat_char(os, WC_C_HZ_TILDA);
Strcat_char(os, (char)cc.code);
return;
case WC_CCS_GB_2312:
if (! st->gl) {
Strcat_char(os, WC_C_HZ_TILDA);
Strcat_char(os, WC_C_HZ_SI);
st->gl = 1;
}
Strcat_char(os, (char)((cc.code >> 8) & 0x7f));
Strcat_char(os, (char)(cc.code & 0x7f));
return;
case WC_CCS_UNKNOWN_W:
if (WcOption.no_replace)
return;
if (st->gl) {
Strcat_char(os, WC_C_HZ_TILDA);
Strcat_char(os, WC_C_HZ_SO);
st->gl = 0;
}
Strcat_charp(os, WC_REPLACE_W);
return;
case WC_CCS_UNKNOWN:
if (WcOption.no_replace)
return;
if (st->gl) {
Strcat_char(os, WC_C_HZ_TILDA);
Strcat_char(os, WC_C_HZ_SO);
st->gl = 0;
}
Strcat_charp(os, WC_REPLACE);
return;
default:
#ifdef USE_UNICODE
if (WcOption.ucs_conv)
cc = wc_any_to_any_ces(cc, st);
else
#endif
cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
continue;
}
}
}
void
wc_push_to_hz_end(Str os, wc_status *st)
{
if (st->gl) {
Strcat_char(os, WC_C_HZ_TILDA);
Strcat_char(os, WC_C_HZ_SO);
st->gl = 0;
}
}
|