File: segclass.c

package info (click to toggle)
anthy 1%3A0.4-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 23,596 kB
  • sloc: ansic: 24,444; sh: 4,186; lisp: 1,265; makefile: 238
file content (118 lines) | stat: -rw-r--r-- 2,736 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#include <string.h>

#include <anthy/splitter.h>
#include <anthy/wtype.h>
#include <anthy/segclass.h>
#include "wordborder.h"

const char *seg_class_tab[] = {
  "H",		/* Head of sentence: Buntou */
  "T",		/* Tail of sentence: Bunmatsu */
  "B",		/* Segment: Bunsetsu */
  "C",		/* Setsuzokugo */
  "Nk",		/* Meishi+Kakujoshi */
  "Ne",		/* Meishi+Shuutan */
  "Vf",		/* Doushi+Fuzokugo */
  "Ve",		/* Doushi+Shuutan */
  "A",		/* Keiyoushi */
  "AJV",	/* Keiyoudoushi */
  "YM",		/* RenyouShuushoku */
  "TM",		/* RentaiShuushoku */
  "N",		/* Meishi */
  "Nf",		/* Meishi+Fuzokugo */
  "Ny",		/* Meishi+Renyou */
  "Vy",		/* Doushi+Renyou */
  "Vt",		/* Doushi+Rentai */
};

void
anthy_set_seg_class(struct word_list* wl)
{
  int head_pos;
  enum dep_class dc;
  enum seg_class seg_class;

  if (!wl) return;

  head_pos = wl->head_pos;
  dc = wl->part[PART_DEPWORD].dc;
  seg_class = SEG_HEAD;

  if (wl->part[PART_CORE].len == 0) {
    seg_class = SEG_BUNSETSU;
  } else {
    switch (head_pos) {
    case POS_NOUN:
    case POS_NUMBER:
      /* BREAK THROUGH */
    case POS_N2T:
      if (dc == DEP_RAW) {
	seg_class = SEG_MEISHI;
      } else if (dc == DEP_END) {
	seg_class = SEG_MEISHI_SHUTAN;
      } else if (dc == DEP_RENYOU) {
	seg_class = SEG_MEISHI_RENYOU;
      } else if (dc == DEP_KAKUJOSHI) {
	seg_class = SEG_MEISHI_KAKUJOSHI;
      } else {
	seg_class = SEG_MEISHI_FUZOKUGO;
      }
      break;
    case POS_V:
      if (dc == DEP_RAW) {
	seg_class = SEG_BUNSETSU;
      } else if (dc == DEP_END) {
	seg_class = SEG_DOUSHI_SHUTAN;
      } else if (dc == DEP_RENYOU) {
	seg_class = SEG_DOUSHI_RENYOU;
      } else if (dc == DEP_RENTAI) {
	seg_class = SEG_DOUSHI_RENTAI;
      } else {
	seg_class = SEG_DOUSHI_FUZOKUGO;
      }
      break;
    case POS_D2KY:
      /* BREAK THROUGH */
    case POS_A:
      seg_class = SEG_KEIYOUSHI;
      if (dc == DEP_RENYOU) {
	seg_class = SEG_RENYOU_SHUSHOKU;
      } else if (dc == DEP_RENTAI) {
	seg_class = SEG_RENTAI_SHUSHOKU;
      }
      break;
    case POS_AJV:
      seg_class = SEG_KEIYOUDOUSHI;
      if (dc == DEP_RENYOU) {
	seg_class = SEG_RENYOU_SHUSHOKU;
      } else if (dc == DEP_RENTAI) {
	seg_class = SEG_RENTAI_SHUSHOKU;
      }
      break;
    case POS_AV:
      seg_class = SEG_RENYOU_SHUSHOKU;
      break;
    case POS_ME:
      seg_class = SEG_RENTAI_SHUSHOKU;
      break;
    case POS_CONJ:
      seg_class = SEG_SETSUZOKUGO;
      break;
    case POS_OPEN:
      seg_class = SEG_BUNSETSU;
      break;
    case POS_CLOSE:
      seg_class = SEG_BUNSETSU;
      break;
    default:
      seg_class = SEG_MEISHI;
      break;
    }
  }
  wl->seg_class = seg_class;
}

const char* anthy_seg_class_sym(enum seg_class sc)
{
  return seg_class_tab[sc];
}