1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
/*
* Unicode Conversion Library (UTF-16 to Shift_JIS)
* 1999-2004 by yoshidam
*
*/
#ifdef USE_SJIS
#include <string.h>
#include <stdlib.h>
#include "uconv.h"
#ifdef USE_WIN32API
# include <windows.h>
# define SJIS_CODEPAGE 932
#else
# include "u2s.h"
#endif
#include "ustring.h"
#include "ruby.h"
#ifndef RSTRING_PTR
# define RSTRING_PTR(s) (RSTRING(s)->ptr)
# define RSTRING_LEN(s) (RSTRING(s)->len)
#endif
#define REPLACEMENT_CHAR '?'
int
u2s_conv2(const unsigned char* u, int len, UString* s,
unknown_unicode unknown_u_conv,
unknown_unicode u2s_hook)
{
int i;
UStr_alloc(s);
for (i = 0; i < len - 1; i += 2) {
VALUE sv;
unsigned long uchar = u[i] | (u[i+1] << 8);
if (uchar >= 0xd800 && uchar < 0xdc00 && /* high surrogate */
i < len - 3) {
unsigned long low = u[i+2] | (u[i+3] << 8);
if (low >= 0xdc00 || low < 0xe000) { /* low surrogate */
uchar = (((uchar & 1023)) << 10 | (low & 1023)) + 0x10000;
i += 2;
}
}
if (u2s_hook &&
(sv = u2s_hook(uchar)) != Qnil) {
if (TYPE(sv) != T_STRING) {
UStr_free(s);
rb_exc_raise(sv);
}
if (RSTRING_LEN(sv) == 0) {
if (unknown_u_conv != NULL) {
VALUE ret = unknown_u_conv(uchar);
if (TYPE(ret) != T_STRING) {
UStr_free(s);
rb_exc_raise(ret);
}
UStr_addChars(s, (unsigned char*)(RSTRING_PTR(ret)), RSTRING_LEN(ret));
}
else {
UStr_addChar(s, REPLACEMENT_CHAR);
}
}
UStr_addChars(s, (unsigned char*)(RSTRING_PTR(sv)), RSTRING_LEN(sv));
}
else {
#ifdef USE_WIN32API
unsigned char str[3];
int slen = WideCharToMultiByte(SJIS_CODEPAGE, 0,
(LPCWSTR)(u + i), 1, str, sizeof(str),
"\xff", NULL);
if (slen == 1 && str[0] == (unsigned char)'\xff') { /* Unknown char */
if (unknown_u_conv != NULL) {
VALUE ret = unknown_u_conv(u[i] | (u[i+1] << 8));
if (TYPE(ret) != T_STRING) {
UStr_free(s);
rb_exc_raise(ret);
}
UStr_addChars(s, (unsigned char*)(RSTRING_PTR(ret)), RSTRING_LEN(ret));
}
else {
UStr_addChar(s, REPLACEMENT_CHAR);
}
}
else if (slen == 1) {
UStr_addChar(s, str[0]);
}
else if (slen == 2) {
UStr_addChar2(s, str[0], str[1]);
}
#else
unsigned short schar = 0;
if (uchar < sizeof(u2s_tbl)/sizeof(unsigned short))
schar = u2s_tbl[uchar];
if (schar > 0 && schar < 128) { /* ASCII */
UStr_addChar(s, schar);
}
else if (schar > 0xa0 && schar <= 0xdf) { /* JIS X 0201 kana */
UStr_addChar(s, schar);
}
else if (schar >= 0x8140 && schar != 0xffff) { /* JIS X 0208 */
UStr_addChar2(s, schar >> 8, schar & 0xff);
}
#endif /* USE_WIN32API */
else {
if (unknown_u_conv != NULL) {
VALUE ret = unknown_u_conv(uchar);
if (TYPE(ret) != T_STRING) {
UStr_free(s);
rb_exc_raise(ret);
}
UStr_addChars(s, (unsigned char*)(RSTRING_PTR(ret)), RSTRING_LEN(ret));
}
else {
UStr_addChar(s, REPLACEMENT_CHAR);
}
}
}
}
return s->len;
}
#endif /* USE_SJIS */
|