File: iconv_hook_eucjp.c

package info (click to toggle)
libapache-mod-encoding 0.0.20021209-10
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd, lenny, squeeze, wheezy
  • size: 2,668 kB
  • ctags: 284
  • sloc: ansic: 94,392; sh: 13,778; makefile: 113
file content (144 lines) | stat: -rw-r--r-- 3,388 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
/* -*- mode: c -*-
 *
 * $Id: iconv_hook_eucjp.c,v 1.4 2002/06/10 13:57:52 tai Exp $
 * Author: KAJIKI Yoshihiro <kajiki@ylug.org>
 * based on the 'iconv_hook_mssjis.c' by Kunio Miyamoto (wakatono@todo.gr.jp)
 */

#include <sys/types.h>
#include <string.h>
#include <dirent.h>
#include <unistd.h>
#include <errno.h>

#include "iconv_hook.h"

#define	SS2		(0x8E)
#define	SS3		(0x8F)
#define is_ascii(c) \
        ((unsigned char) (c) < 0x80)	/* ISO 646 */
#define is_kanji(c) \
        ((unsigned char) (c) > 0x9F)	/* JIS X 0208 */
#define is_hankana(c) \
        ((unsigned char) (c) == SS2)	/* JIS X 0201 */
#define is_hojyo(c) \
        ((unsigned char) (c) == SS3)	/* JIS X 0212 */

size_t mssjis_iconv(iconv_t cd,
            char **srcbuf, size_t *srclen, char **outbuf, size_t *outlen);

static size_t skip_bytes(char c)
{
  if (is_ascii(c)) {
    return 1;
  } else if (is_kanji(c) || is_hankana(c)) {
    return 2;
  } else if (is_hojyo(c)) {
    return 3;
  }
  return 0;
}

/*
 * eucjp_iconv_open()
 * by Kunio Miyamoto (wakatono@todo.gr.jp)
 * and KAJIKI Yoshihiro <kajiki@ylug.org>
 * This code is for iconv() interface compatibility.
 * and processes nothing but returns normal return code(fixes to 1).
 */
 
static iconv_t
eucjp_iconv_open(const char *oenc, const char *ienc) {
  if( ((strncmp(ienc,"EUC-JP",6) == 0) || (strncmp(ienc,"UJIS",4) == 0)
     || (strncmp(ienc,"EUCJP",5) == 0)) && (strncmp(oenc,"UTF-8",5) == 0) )
  {
  	return (iconv_t)1;
  }
  else
  {
  	return (iconv_t)-1;
  }
}

/*
 * eucjp_iconv_close() 
 * by Kunio Miyamoto (wakatono@todo.gr.jp)
 * and KAJIKI Yoshihiro <kajiki@ylug.org>
 * This code is for iconv() interface compatibility.
 * and processes nothing but returns normal return code(fixes to 1).
 */
 
static int
eucjp_iconv_close(iconv_t cd) {
  return 0;
}

/* eucjp_iconv()
 * by KAJIKI Yoshihiro <kajiki@ylug.org>
 *  EUC-JP code to UTF-8 via Microsoft ShiftJIS using mssjis_iconv().
 * This is experimental code for processing EUC-JP.
 */
 
static size_t
eucjp_iconv(iconv_t cd,
	    char **srcbuf, size_t *srclen, char **outbuf, size_t *outlen) {
  unsigned char *tmpbuf, *tmp;
  unsigned char *src;
  unsigned char ch, cl;
  size_t ret;

  if (! (srcbuf && srclen && outbuf && outlen))
    return 0;

  /* translate EUC-JP into SJIS */
  src = (unsigned char *)*srcbuf;
  tmp = tmpbuf = malloc(*srclen+2);
  while (*src && ((tmp - tmpbuf) < *srclen)) {
    ch = *src++;
    if (is_ascii(ch)) {
      *tmp++ = ch;
    } else {
      cl = *src++;
      if (is_kanji(ch)) {
        *tmp++ = ((ch-0x5f)/2) ^ 0xA0;
        if (!(ch&1))
          *tmp++ = cl - 0x02;
        else if (cl < 0xE0)
          *tmp++ = cl - 0x61;
        else
          *tmp++ = cl - 0x60;
      } else if (is_hankana(ch)) {
        if (cl < 0xA0 || cl > 0xDF) {
          *srcbuf=(char *)(src-2);
          errno=EILSEQ;
          return -1;
        }
        *tmp++ = cl;
      } else {
        /* We don't support JIS X 0212 */
        *srcbuf=(char *)(src-2);
        errno=EILSEQ;
        return -1;
      }
    }
  }
  *tmp='\0';

  ret = mssjis_iconv(cd, (char **) &tmpbuf, srclen, outbuf, outlen);
  free(tmpbuf);
  *srcbuf += *src;
  *srclen = 0;
  return ret;
}

static iconv_hook_module iconv_hook_eucjp = {
  eucjp_iconv,
  eucjp_iconv_open,
  eucjp_iconv_close,
};

iconv_hook_module *
iconv_hook_eucjp_init(void) {
  return &iconv_hook_eucjp;
}