1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
|
/*
* Unicode Conversion Library (UTF-16 to EUC-JP)
* 1997-2002 by yoshidam
*
*/
#ifdef USE_EUC
#include <string.h>
#include <stdlib.h>
#include "uconv.h"
#include "u2e.h"
#include "ustring.h"
#include "ruby.h"
#ifndef RSTRING_PTR
# define RSTRING_PTR(s) (RSTRING(s)->ptr)
# define RSTRING_LEN(s) (RSTRING(s)->len)
#endif
#define REPLACEMENT_CHAR '?'
int
u2e_conv2(const unsigned char* u, int len, UString* e,
unknown_unicode unknown_u_conv,
unknown_unicode u2e_hook)
{
int i;
UStr_alloc(e);
for (i = 0; i < len - 1; i += 2) {
VALUE ev;
unsigned long uchar = u[i] | (u[i+1] << 8);
if (uchar >= 0xd800 && uchar < 0xdc00 && /* high surrogate */
i < len - 3) {
unsigned long low = u[i+2] | (u[i+3] << 8);
if (low >= 0xdc00 || low < 0xe000) { /* low surrogate */
uchar = (((uchar & 1023)) << 10 | (low & 1023)) + 0x10000;
i += 2;
}
}
if (u2e_hook &&
(ev = u2e_hook(uchar)) != Qnil) {
if (TYPE(ev) != T_STRING) {
UStr_free(e);
rb_exc_raise(ev);
}
if (RSTRING_LEN(ev) == 0) {
if (unknown_u_conv != NULL) {
VALUE ret = unknown_u_conv(uchar);
if (TYPE(ret) != T_STRING) {
UStr_free(e);
rb_exc_raise(ret);
}
UStr_addChars(e, (unsigned char*)(RSTRING_PTR(ret)), RSTRING_LEN(ret));
}
else {
UStr_addChar(e, REPLACEMENT_CHAR);
}
}
UStr_addChars(e, (unsigned char*)(RSTRING_PTR(ev)), RSTRING_LEN(ev));
}
else {
unsigned short echar = 0;
if (uchar < sizeof(u2e_tbl)/sizeof(unsigned short))
echar = u2e_tbl[uchar];
if (echar > 0 && echar < 128) { /* ASCII */
UStr_addChar(e, echar);
}
else if (echar > 0xa0 && echar <= 0xdf) { /* JIS X 0201 kana */
UStr_addChar2(e, 0x8e, echar & 0xff);
}
else if (echar >= 0x2121 && echar <= 0x6d63) { /* JIS X 0212 */
UStr_addChar3(e, 0x8f, (echar >> 8) | 0x80, (echar & 0xff) | 0x80);
}
else if (echar >= 0xa0a0 && echar != 0xffff) { /* JIS X 0208 */
UStr_addChar2(e, echar >> 8, echar & 0xff);
}
else { /* Unknown char */
if (unknown_u_conv != NULL) {
VALUE ret = unknown_u_conv(uchar);
if (TYPE(ret) != T_STRING) {
UStr_free(e);
rb_exc_raise(ret);
}
UStr_addChars(e, (unsigned char*)(RSTRING_PTR(ret)), RSTRING_LEN(ret));
}
else {
UStr_addChar(e, REPLACEMENT_CHAR);
}
}
}
}
return e->len;
}
#endif /* USE_EUC */
|