File: u2e.c

package info (click to toggle)
ruby-uconv 0.6.1-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 4,716 kB
  • ctags: 197
  • sloc: ansic: 161,247; ruby: 44,420; makefile: 2
file content (100 lines) | stat: -rw-r--r-- 2,628 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/*
 * Unicode Conversion Library (UTF-16 to EUC-JP)
 * 1997-2002 by yoshidam
 *
 */

#ifdef USE_EUC

#include <string.h>
#include <stdlib.h>
#include "uconv.h"
#include "u2e.h"
#include "ustring.h"
#include "ruby.h"

#ifndef RSTRING_PTR
#  define RSTRING_PTR(s) (RSTRING(s)->ptr)
#  define RSTRING_LEN(s) (RSTRING(s)->len)
#endif

#define REPLACEMENT_CHAR '?'

int
u2e_conv2(const unsigned char* u, int len, UString* e,
	  unknown_unicode unknown_u_conv,
          unknown_unicode u2e_hook)
{
  int i;
 
  UStr_alloc(e);

  for (i = 0; i < len - 1; i += 2) {
    VALUE ev;
    unsigned long uchar = u[i] | (u[i+1] << 8);
    if (uchar >= 0xd800 && uchar < 0xdc00 && /* high surrogate */
        i < len - 3) {
      unsigned long low = u[i+2] | (u[i+3] << 8);
      if (low >= 0xdc00 || low < 0xe000) { /* low surrogate */
        uchar = (((uchar & 1023)) << 10 | (low & 1023)) + 0x10000;
        i += 2;
      }
    }
    if (u2e_hook &&
        (ev = u2e_hook(uchar)) != Qnil) {
      if (TYPE(ev) != T_STRING) {
        UStr_free(e);
        rb_exc_raise(ev);
      }
      if (RSTRING_LEN(ev) == 0) {
        if (unknown_u_conv != NULL) {
          VALUE ret = unknown_u_conv(uchar);
          if (TYPE(ret) != T_STRING) {
            UStr_free(e);
            rb_exc_raise(ret);
          }
          UStr_addChars(e, (unsigned char*)(RSTRING_PTR(ret)), RSTRING_LEN(ret));
        }
        else {
          UStr_addChar(e, REPLACEMENT_CHAR);
        }
      }
      UStr_addChars(e, (unsigned char*)(RSTRING_PTR(ev)), RSTRING_LEN(ev));
    }
    else {
      unsigned short echar = 0;

      if (uchar < sizeof(u2e_tbl)/sizeof(unsigned short))
          echar = u2e_tbl[uchar];
      if (echar > 0 && echar < 128) { /* ASCII */
        UStr_addChar(e, echar);
      }
      else if (echar > 0xa0 && echar <= 0xdf) { /* JIS X 0201 kana */
        UStr_addChar2(e, 0x8e, echar & 0xff);
      }
      else if (echar >= 0x2121 && echar <= 0x6d63) { /* JIS X 0212 */
        UStr_addChar3(e, 0x8f, (echar >> 8) | 0x80, (echar & 0xff) | 0x80);
      }
      else if (echar >= 0xa0a0 && echar != 0xffff) { /* JIS X 0208 */
        UStr_addChar2(e, echar >> 8, echar & 0xff);
      }
      else { /* Unknown char */
        if (unknown_u_conv != NULL) {
          VALUE ret = unknown_u_conv(uchar);
          if (TYPE(ret) != T_STRING) {
            UStr_free(e);
            rb_exc_raise(ret);
          }
          UStr_addChars(e, (unsigned char*)(RSTRING_PTR(ret)), RSTRING_LEN(ret));
        }
        else {
          UStr_addChar(e, REPLACEMENT_CHAR);
        }
      }
    }
  }

  return e->len;
}

#endif /* USE_EUC */