File: kanaid.h

package info (click to toggle)
lookup 1.08b-10
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k, lenny
  • size: 1,112 kB
  • ctags: 1,306
  • sloc: ansic: 12,638; makefile: 245; perl: 174; sh: 53
file content (108 lines) | stat: -rw-r--r-- 4,539 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#ifndef __KANAID_H__ /* file wrapper */
#define __KANAID_H__
/*
 * Jeffrey Friedl
 * Omron Corporation			ʳ
 * Nagaokakyoshi, Japan			617Ĺ
 *
 * jfriedl@nff.ncl.omron.co.jp
 *
 * This work is placed under the terms of the GNU General Purpose License
 * (the "GNU Copyleft").
 *
 * Oct 1993
 *
 * Routines (a'la ctype) to give info about EUC kana characters.
 * Given the two-byte character as the two bytes HI and LO:
 *    IS_KATAKANA(HI, LO)   -- true if character is katakana
 *    IS_HIRAGANA(HI, LO)   -- true if character is hiragana
 *    IS_KANA(HI, LO)	    -- true if either of the above.
 *    IS_DASH(HI, LO)	    -- true if the character֡(which isn't kana)
 *
 *    KANA_ID(HI, LO)	    -- VALID ONLY IF IS_KANA(HI,LO) IS TRUE,
 *                             returns the KID_* codes defined below which
 *			       describe the character.
 *
 * Warning: these are macros so arguments shouldn't have side effects.
 */
#define kanaid_version 100 /* 1.00 */

/*
 * Flags returned by KANA_ID(Highbyte, Lowbyte) have the following bits set
 */
#define KID_A            0x00000001        /* */
#define KID_I            0x00000002        /* */
#define KID_U            0x00000004        /* */
#define KID_E            0x00000008        /* */
#define KID_O            0x00000010        /* */

/* mask to nab the volwel sound */
#define KID_VSOUND     (KID_A|KID_I|KID_U|KID_E|KID_O)

#define KID_K            0x00000020        /*  */
#define KID_G            0x00000040        /*  */
#define KID_S            0x00000080        /*  */
#define KID_Z            0x00000100        /*  */
#define KID_T            0x00000200        /* ġơ */
#define KID_D            0x00000400        /* ¡šǡ */
#define KID_N            0x00000800        /* ʡˡ̡͡ */
#define KID_H            0x00001000        /* ϡҡաء */
#define KID_B            0x00002000        /* Сӡ֡١ */
#define KID_P            0x00004000        /* ѡԡסڡ */
#define KID_M            0x00008000        /* ޡߡࡢᡢ */
#define KID_Y            0x00010000        /* 䡢桢   */
#define KID_R            0x00020000        /* 顢ꡢ롢졢 */
#define KID_W            0x00040000        /*            */
#define KID_n            0x00100000        /*                  */

/* mask to nab the consonant sound */
#define KID_CSOUND (KID_K|KID_G|KID_S|KID_Z|KID_T|KID_D|KID_N| \
		    KID_H|KID_B|KID_P|KID_M|KID_Y|KID_R|KID_W|KID_n)

#define KID_o            0x00080000    /*  */

#define KID_SMALL        0x00200000    /* If small, as in ,,, etc. */
#define KID_VOWEL        0x00400000    /* If a raw vowel [].   */
#define KID_KATAONLY     0x00800000    /* If char found only in katakana */
#define KID_ARCHAIC      0x01000000    /* If archaic                     */

#define KID_DUAL         0x02000000    /* For [ , , ,  ] */


/***********************************************************************/
/***********************************************************************/

#define KID_HIRA_HI 0244    /* high byte for hiragana EUC */
#define KID_KATA_HI 0245    /* high byte for katakana EUC */

/* true if the High/Low pair is the EUC ֡ */
#define IS_DASH(HighByte, LowByte)  ((HighByte) == 0241 && (LowByte)== 0274)


#define _KID_START   161  /* the Low byte associated with _KID[0] */
#define _KID_END     246  /* the Low byte associated with end of _KID[] */

/* True if the bytes represent a katakana character (except dash) */
#define IS_KATAKANA(HighByte, LowByte) ((HighByte) == KID_KATA_HI && \
                                        (LowByte) >= _KID_START &&    \
					(LowByte) <= _KID_END)

/* True if the bytes represent a hiragana character */
#define IS_HIRAGANA(HighByte,LowByte)  ((HighByte) == KID_HIRA_HI && \
                                        (LowByte) >= _KID_START &&    \
					(LowByte) <= _KID_END)

/* True if either katakana or hiragana */
#define IS_KANA(H,L)       (IS_KATAKANA(H,L) || IS_HIRAGANA(H,L))

/*
 * Returns the Kana ID for the given character.
 *
 *  --> only valid if IS_KANA() or IS_HIRAGANA or IS_KATAKANA is true! <--
 *
 */
#define KANA_ID(HighByte, LowByte)  (_KID[(LowByte)-_KID_START])

extern unsigned long _KID[]; /* in kanaid.c */

#endif /* file wrapper */