File: u2s.c

package info (click to toggle)
ruby-uconv 0.6.1-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid, stretch
  • size: 4,716 kB
  • ctags: 197
  • sloc: ansic: 161,247; ruby: 44,420; makefile: 2
file content (130 lines) | stat: -rw-r--r-- 3,394 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/*
 * Unicode Conversion Library (UTF-16 to Shift_JIS)
 * 1999-2004 by yoshidam
 *
 */

#ifdef USE_SJIS

#include <string.h>
#include <stdlib.h>
#include "uconv.h"
#ifdef USE_WIN32API
#  include <windows.h>
#  define SJIS_CODEPAGE 932
#else
#  include "u2s.h"
#endif
#include "ustring.h"
#include "ruby.h"

#ifndef RSTRING_PTR
#  define RSTRING_PTR(s) (RSTRING(s)->ptr)
#  define RSTRING_LEN(s) (RSTRING(s)->len)
#endif

#define REPLACEMENT_CHAR '?'


int
u2s_conv2(const unsigned char* u, int len, UString* s,
	  unknown_unicode unknown_u_conv,
          unknown_unicode u2s_hook)
{
  int i;

  UStr_alloc(s);

  for (i = 0; i < len - 1; i += 2) {
    VALUE sv;
    unsigned long uchar = u[i] | (u[i+1] << 8);
    if (uchar >= 0xd800 && uchar < 0xdc00 && /* high surrogate */
        i < len - 3) {
      unsigned long low = u[i+2] | (u[i+3] << 8);
      if (low >= 0xdc00 || low < 0xe000) { /* low surrogate */
        uchar = (((uchar & 1023)) << 10 | (low & 1023)) + 0x10000;
        i += 2;
      }
    }
    if (u2s_hook &&
        (sv = u2s_hook(uchar)) != Qnil) {
      if (TYPE(sv) != T_STRING) {
        UStr_free(s);
        rb_exc_raise(sv);
      }
      if (RSTRING_LEN(sv) == 0) {
        if (unknown_u_conv != NULL) {
          VALUE ret = unknown_u_conv(uchar);
          if (TYPE(ret) != T_STRING) {
            UStr_free(s);
            rb_exc_raise(ret);
          }
          UStr_addChars(s, (unsigned char*)(RSTRING_PTR(ret)), RSTRING_LEN(ret));
        }
        else {
          UStr_addChar(s, REPLACEMENT_CHAR);
        }
      }
      UStr_addChars(s, (unsigned char*)(RSTRING_PTR(sv)), RSTRING_LEN(sv));
    }
    else {
#ifdef USE_WIN32API
      unsigned char str[3];
      int slen = WideCharToMultiByte(SJIS_CODEPAGE, 0,
                                     (LPCWSTR)(u + i), 1, str, sizeof(str),
                                     "\xff", NULL);

      if (slen == 1 && str[0] == (unsigned char)'\xff') { /* Unknown char */
        if (unknown_u_conv != NULL) {
          VALUE ret = unknown_u_conv(u[i] | (u[i+1] << 8));
          if (TYPE(ret) != T_STRING) {
            UStr_free(s);
            rb_exc_raise(ret);
          }
          UStr_addChars(s, (unsigned char*)(RSTRING_PTR(ret)), RSTRING_LEN(ret));
        }
        else {
          UStr_addChar(s, REPLACEMENT_CHAR);
        }
      }
      else if (slen == 1) {
        UStr_addChar(s, str[0]);
      }
      else if (slen == 2) {
        UStr_addChar2(s, str[0], str[1]);
      }
#else
      unsigned short schar = 0;

      if (uchar < sizeof(u2s_tbl)/sizeof(unsigned short))
        schar = u2s_tbl[uchar];
      if (schar > 0 && schar < 128) { /* ASCII */
        UStr_addChar(s, schar);
      }
      else if (schar > 0xa0 && schar <= 0xdf) { /* JIS X 0201 kana */
        UStr_addChar(s, schar);
      }
      else if (schar >= 0x8140 && schar != 0xffff) { /* JIS X 0208 */
        UStr_addChar2(s, schar >> 8, schar & 0xff);
      }
#endif /* USE_WIN32API */
      else {
        if (unknown_u_conv != NULL) {
          VALUE ret = unknown_u_conv(uchar);
          if (TYPE(ret) != T_STRING) {
            UStr_free(s);
            rb_exc_raise(ret);
          }
          UStr_addChars(s, (unsigned char*)(RSTRING_PTR(ret)), RSTRING_LEN(ret));
        }
        else {
          UStr_addChar(s, REPLACEMENT_CHAR);
        }
      }
    }
  }

  return s->len;
}

#endif /* USE_SJIS */