File: splitter.h

package info (click to toggle)
anthy 6300d-2
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 7,400 kB
  • ctags: 2,270
  • sloc: ansic: 17,009; sh: 13,554; lisp: 1,039; makefile: 252; ruby: 212; perl: 10
file content (118 lines) | stat: -rw-r--r-- 2,854 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/* splitter⥸塼Υ󥿡ե */
#ifndef _splitter_h_included_
#define _splitter_h_included_

#include <dic.h>
#include <xstr.h>
#include <wtype.h>
#include <segclass.h>

/* ѥ᡼ */
#define RATIO_BASE 256
#define OCHAIRE_SCORE 5000000

/** splitterΥƥȡ
 * ǽζ꤫anthy_contextβޤͭ
 */
struct splitter_context {
  /** splitterǻѤ빽¤ */
  struct word_split_info_cache *word_split_info;
  int char_count;
  struct char_ent {
    xchar *c;
    int seg_border;
    int initial_seg_len;/* ǽʸʬκݤˤϤޤäʸ᤬
			   ФĹ */
    enum seg_class best_seg_class;
    struct meta_word* best_mw; /* ͥ褷ƻȤmetaword */
  }*ce;
};

/* Υåξ */
enum constraint_stat {
  unchecked, ok, ng
};

/* ȤꤢŬ䤷Ƥߤ꤬Фʬह */
enum metaword_type {
  /* ߡ : seginfoʤ */
  MW_DUMMY,
  /* wordlist0 or Ĵޤ */
  MW_SINGLE,
  /* ̤metawordĤޤ: metaword +  ʤ :seginfomw1 */
  MW_WRAP,
  /* ʣƬ */
  MW_COMPOUND_HEAD,
  /* ʣ */
  MW_COMPOUND,
  /* ʣΰʸ */
  MW_COMPOUND_LEAF,
  /* ʣθġʸ礷ưĤʸȤƤߤ */
  MW_COMPOUND_PART,
  /* Ļ̾Υڥ */
  MW_NAMEPAIR,
  /* ưϢѷ + ƻ */
  MW_V_RENYOU_A,
  /* ưϢѷ + ̾ */
  MW_V_RENYOU_NOUN,
  /*  */
  MW_NUMBER,
  /**/
  MW_NOUN_NOUN_PREFIX,
  MW_OCHAIRE,
  /* Ҹδط */
  MW_SENTENCE,
  /* 佤δط */
  MW_MODIFIED,
  /**/
  MW_END
};

/*
 * meta_word: θоݤȤʤ
 * ñword_listޤΤ¾ˤĤμब롥
 * 
 */
struct meta_word {
  int from, len;
  int weak_len;
  int score;
  enum seg_class seg_class;
  int mw_count;/* metawordο */
  enum constraint_stat can_use; /* ȶ˸٤äƤʤ */
  enum metaword_type type;
  struct word_list *wl;
  struct meta_word *mw1, *mw2;
  xstr cand_hint;

  int nr_parts;

  /* listΥ */
  struct meta_word *next;
  struct meta_word *composed;

  /* ʲϹ¤򥳥ߥåȤȤ˻Ȥ */
  struct meta_word *parent;
};

int anthy_init_splitter(void);
void anthy_quit_splitter(void);

void anthy_init_split_context(xstr *xs, struct splitter_context *);
/*
 * mark_border(context, l1, l2, r1);
 * l1r1δ֤ʸ򸡽Ф롢l1l2δ֤϶ˤʤ
 */
void anthy_mark_border(struct splitter_context *, int from, int from2, int to);
void anthy_commit_border(struct splitter_context *, int nr,
		   struct meta_word **mw, int *len);
void anthy_release_split_context(struct splitter_context *c);

/* Фʸξ */
int anthy_get_nr_metaword(struct splitter_context *, int from, int len);
struct meta_word *anthy_get_nth_metaword(struct splitter_context *,
				 int from, int len, int nth);



#endif