1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
|
/* ʸᶭθФ˻Ȥǡ */
#ifndef _wordborder_h_included_
#define _wordborder_h_included_
#include <anthy/dic.h>
#include <anthy/alloc.h>
#include <anthy/segclass.h>
#include <anthy/depgraph.h>
struct splitter_context;
/*
* meta_wordλѲǽåΤ
*/
enum mw_check {
/* ʤˤ⤻ */
MW_CHECK_NONE,
/* mw->wl̵wlȤ */
MW_CHECK_SINGLE,
MW_CHECK_BORDER,
MW_CHECK_WRAP,
MW_CHECK_OCHAIRE,
MW_CHECK_NUMBER,
MW_CHECK_COMPOUND
};
/*
* ʸΤɽ
* Ϥޤmeta_word, word_listΥåȤ
*/
struct char_node {
int max_len;
struct meta_word *mw;
struct word_list *wl;
};
/*
* ƥμΩʤɤξǽѴȤ
* ۤ
*/
struct word_split_info_cache {
struct char_node *cnode;
/* å幽˻Ȥ */
/* õΤ˻Ȥ */
int *seq_len;/* ϤޤĹñĹ */
/* ƬõΤ˻Ȥ */
int *rev_seq_len;/* ǽĹñĹ */
/* ʸᶭcontextΥԡ */
int *seg_border;
/* ǰӤɤä饹 */
enum seg_class* best_seg_class;
/* */
struct meta_word **best_mw;
/* */
allocator MwAllocator, WlAllocator;
};
/*
* meta_wordξ
*/
enum mw_status {
MW_STATUS_NONE,
/* mw->mw1ȤäƤ */
MW_STATUS_WRAPPED,
/* mw-mw1mw->mw2Ϣ */
MW_STATUS_COMBINED,
/* ʣ */
MW_STATUS_COMPOUND,
/* ʣθġʸ礷ưĤʸȤƸ */
MW_STATUS_COMPOUND_PART,
/* OCHAIREؽФ */
MW_STATUS_OCHAIRE
};
/* metawordμˤΰ㤤 (metaword.c) */
extern struct metaword_type_tab_ {
enum metaword_type type;
const char *name;
enum mw_status status;
enum mw_check check;
} anthy_metaword_type_tab[];
/*
* 0: Ƭ
* 1: Ω
* 2:
*/
#define NR_PARTS 4
#define PART_PREFIX 0
#define PART_CORE 1
#define PART_POSTFIX 2
#define PART_DEPWORD 3
struct part_info {
/* partĹ */
int from, len;
/* ʻ */
wtype_t wt;
seq_ent_t seq;
/* */
int freq;
/* °쥯饹 */
enum dep_class dc;
};
/*
* word_list: ʸ
* Ƭ졢Ω졢졢°ޤ
*/
struct word_list {
/**/
int from, len; /* ʸ */
int is_compound; /* ʣ줫ɤ */
/**/
int dep_word_hash;
int mw_features;
/**/
enum seg_class seg_class;
enum constraint_stat can_use; /* ȶ˸٤äƤʤ */
/* 뤿ǤϤʤơ¿ʽ˻Ȥ */
int head_pos; /* latticeѤʻ */
int tail_ct; /* meta_wordηѤγѷ */
/**/
int last_part;
struct part_info part[NR_PARTS];
/* word_listäݤξ */
int node_id; /* °쥰դθϤnodeid*/
/* Ʊfromword_listΥꥹ */
struct word_list *next;
};
/* splitter.c */
#define SPLITTER_DEBUG_NONE 0
/* wordlistɽ */
#define SPLITTER_DEBUG_WL 1
/* metawordɽ */
#define SPLITTER_DEBUG_MW 2
/* lattice nodeɽ */
#define SPLITTER_DEBUG_LN 4
/* ΩΥޥåʻ */
#define SPLITTER_DEBUG_ID 8
/**/
#define SPLITTER_DEBUG_CAND 16
int anthy_splitter_debug_flags(void);
/* defined in wordseq.c */
/* Ωʹߤ³ν */
void anthy_scan_node(struct splitter_context *sc,
struct word_list *wl,
xstr *follow, int node);
int anthy_get_node_id_by_name(const char *name);
int anthy_init_depword_tab(void);
void anthy_quit_depword_tab(void);
/* depgraph.c */
int anthy_get_nr_dep_rule(void);
void anthy_get_nth_dep_rule(int, struct wordseq_rule *);
/* defined in wordlist.c */
void anthy_commit_word_list(struct splitter_context *, struct word_list *wl);
struct word_list *anthy_alloc_word_list(struct splitter_context *);
void anthy_print_word_list(struct splitter_context *, struct word_list *);
void anthy_make_word_list_all(struct splitter_context *);
/* defined in metaword.c */
void anthy_commit_meta_word(struct splitter_context *, struct meta_word *mw);
void anthy_make_metaword_all(struct splitter_context *);
void anthy_print_metaword(struct splitter_context *, struct meta_word *);
void anthy_mark_border_by_metaword(struct splitter_context* sc,
struct meta_word* mw);
/* defined in evalborder.c */
void anthy_eval_border(struct splitter_context *, int, int, int);
/* defined at lattice.c */
void anthy_mark_borders(struct splitter_context *sc, int from, int to);
/* defined at seg_class.c */
void anthy_set_seg_class(struct word_list* wl);
/* ʻ(anthy_init_splitterǽ) */
extern wtype_t anthy_wtype_noun;
extern wtype_t anthy_wtype_name_noun;
extern wtype_t anthy_wtype_num_noun;
extern wtype_t anthy_wtype_prefix;
extern wtype_t anthy_wtype_num_prefix;
extern wtype_t anthy_wtype_num_postfix;
extern wtype_t anthy_wtype_name_postfix;
extern wtype_t anthy_wtype_sv_postfix;
extern wtype_t anthy_wtype_a_tail_of_v_renyou;
extern wtype_t anthy_wtype_v_renyou;
extern wtype_t anthy_wtype_noun_tail;/* ֤ơפȤ */
extern wtype_t anthy_wtype_n1;
extern wtype_t anthy_wtype_n10;
#endif
|