File: wordborder.h

package info (click to toggle)
anthy 9100h-16
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 17,528 kB
  • sloc: ansic: 26,140; sh: 8,573; lisp: 1,264; makefile: 208
file content (210 lines) | stat: -rw-r--r-- 5,039 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
/* ʸᶭθФ˻Ȥǡ */
#ifndef _wordborder_h_included_
#define _wordborder_h_included_


#include <anthy/dic.h>
#include <anthy/alloc.h>
#include <anthy/segclass.h>
#include <anthy/depgraph.h>

struct splitter_context;

/*
 * meta_wordλѲǽåΤ
 */
enum mw_check {
  /* ʤˤ⤻ */
  MW_CHECK_NONE,
  /* mw->wl̵wlȤ */
  MW_CHECK_SINGLE,
  MW_CHECK_BORDER,
  MW_CHECK_WRAP,
  MW_CHECK_OCHAIRE,
  MW_CHECK_NUMBER,
  MW_CHECK_COMPOUND
};

/*
 * ʸΤɽ
 * Ϥޤmeta_word, word_listΥåȤ
 */
struct char_node {
  int max_len;
  struct meta_word *mw;
  struct word_list *wl;
};

/*
 * ƥμΩʤɤξ󡢺ǽѴ򲡤Ȥ
 * ۤ
 */
struct word_split_info_cache {
  struct char_node *cnode;

  /* å幽˻Ȥ */
  /* õΤ˻Ȥ */
  int *seq_len;/* ϤޤĹñĹ */
  /* ƬõΤ˻Ȥ */
  int *rev_seq_len;/* ǽĹñĹ */
  /* ʸᶭcontextΥԡ */
  int *seg_border;
  /* ǰӤɤä饹 */
  enum seg_class* best_seg_class;
  /*  */
  struct meta_word **best_mw;
  /*  */
  allocator MwAllocator, WlAllocator;
};

/*
 * meta_wordξ
 */
enum mw_status {
  MW_STATUS_NONE,
  /* mw->mw1ȤäƤ */
  MW_STATUS_WRAPPED,
  /* mw-mw1mw->mw2Ϣ */
  MW_STATUS_COMBINED,
  /* ʣ */
  MW_STATUS_COMPOUND,
  /* ʣθġʸ礷ưĤʸȤƸ */
  MW_STATUS_COMPOUND_PART,
  /* OCHAIREؽФ */
  MW_STATUS_OCHAIRE
};



/* metawordμˤΰ㤤 (metaword.c) */
extern struct metaword_type_tab_ {
  enum metaword_type type;
  const char *name;
  enum mw_status status;
  enum mw_check check;
} anthy_metaword_type_tab[];

/*
 * 0: Ƭ
 * 1: Ω
 * 2: 
 */
#define NR_PARTS 4
#define PART_PREFIX 0
#define PART_CORE 1
#define PART_POSTFIX 2
#define PART_DEPWORD 3

struct part_info {
  /* partĹ */
  int from, len;
  /* ʻ */
  wtype_t wt;
  seq_ent_t seq;
  /*  */
  int freq;
  /* °쥯饹 */
  enum dep_class dc;
};

/*
 * word_list: ʸ
 * Ƭ졢Ω졢졢°ޤ
 */
struct word_list {
  /**/
  int from, len; /* ʸ */
  int is_compound; /* ʣ줫ɤ */

  /**/
  int dep_word_hash;
  int mw_features;
  /**/
  enum seg_class seg_class;
  enum constraint_stat can_use; /* ȶ˸٤äƤʤ */

  /* 뤿ǤϤʤơ¿ʽ˻Ȥ */
  int head_pos; /* latticeѤʻ */
  int tail_ct; /* meta_wordηѤγѷ */

  /**/
  int last_part;
  struct part_info part[NR_PARTS];

  /* word_listäݤξ */
  int node_id; /* °쥰դθϤnodeid*/

  /* Ʊfromword_listΥꥹ */
  struct word_list *next;
};


/* splitter.c */
#define SPLITTER_DEBUG_NONE 0
/* wordlistɽ */
#define SPLITTER_DEBUG_WL 1
/* metawordɽ */
#define SPLITTER_DEBUG_MW 2
/* lattice nodeɽ */
#define SPLITTER_DEBUG_LN 4
/* ΩΥޥåʻ */
#define SPLITTER_DEBUG_ID 8
/**/
#define SPLITTER_DEBUG_CAND 16

int anthy_splitter_debug_flags(void);


/* defined in wordseq.c */
/* Ωʹߤ³ν */
void anthy_scan_node(struct splitter_context *sc,
		     struct word_list *wl,
		     xstr *follow, int node);
int anthy_get_node_id_by_name(const char *name);
int anthy_init_depword_tab(void);
void anthy_quit_depword_tab(void);

/* depgraph.c */
int anthy_get_nr_dep_rule(void);
void anthy_get_nth_dep_rule(int, struct wordseq_rule *);

/* defined in wordlist.c */
void anthy_commit_word_list(struct splitter_context *, struct word_list *wl);
struct word_list *anthy_alloc_word_list(struct splitter_context *);
void anthy_print_word_list(struct splitter_context *, struct word_list *);
void anthy_make_word_list_all(struct splitter_context *);

/* defined in metaword.c */
void anthy_commit_meta_word(struct splitter_context *, struct meta_word *mw);
void anthy_make_metaword_all(struct splitter_context *);
void anthy_print_metaword(struct splitter_context *, struct meta_word *);

void anthy_mark_border_by_metaword(struct splitter_context* sc,
				   struct meta_word* mw);


/* defined in evalborder.c */
void anthy_eval_border(struct splitter_context *, int, int, int);

/* defined at lattice.c */
void anthy_mark_borders(struct splitter_context *sc, int from, int to);

/* defined at seg_class.c */
void anthy_set_seg_class(struct word_list* wl);

/* ʻ(anthy_init_splitterǽ) */
extern wtype_t anthy_wtype_noun;
extern wtype_t anthy_wtype_name_noun;
extern wtype_t anthy_wtype_num_noun;
extern wtype_t anthy_wtype_prefix;
extern wtype_t anthy_wtype_num_prefix;
extern wtype_t anthy_wtype_num_postfix;
extern wtype_t anthy_wtype_name_postfix;
extern wtype_t anthy_wtype_sv_postfix;
extern wtype_t anthy_wtype_a_tail_of_v_renyou;
extern wtype_t anthy_wtype_v_renyou;
extern wtype_t anthy_wtype_noun_tail;/* ֤ơפȤ */
extern wtype_t anthy_wtype_n1;
extern wtype_t anthy_wtype_n10;

#endif