1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
|
/*
* $Id: chalib.h,v 1.1.1.1 2007/03/13 07:40:10 masayu-a Exp $
*/
#ifndef __CHALIB_H__
#define __CHALIB_H__
#include "chadic.h"
#include "chasen.h"
#if defined _WIN32 && ! defined __CYGWIN__
#define strcasecmp stricmp
#define strncasecmp strnicmp
#endif /* _WIN32 */
#define CHA_PATH_NUM 1024
#define CHA_INPUT_SIZE 8192
#define UNDEF_HINSI_MAX 256
#define DIC_NUM 32 /* Ʊ˻Ȥ뼭οξ (ChaSen) */
#define MRPH_NUM 1024
#define PATH1_NUM 256
/*
* structures
*/
typedef struct _mrph_t {
/* don't move this order */
unsigned short posid;
unsigned char inf_type;
unsigned char inf_form;
unsigned short weight;
short con_tbl;
long dat_index;
char *headword;
short headword_len;
char is_undef;
void *darts;
} mrph_t;
typedef struct _mrph_data_t {
mrph_t *mrph;
short stem_len;
char *reading;
char *pron;
short reading_len;
short pron_len;
char *base;
char *info;
long compound;
} mrph_data_t;
typedef struct _path_t {
int mrph_p;
short state;
short start;
short end;
short do_print;
int cost;
int *path;
int best_path;
} path_t;
typedef struct _cha_lat_t {
unsigned char text[CHA_INPUT_SIZE]; /* XXX */
int len;
// path_t *lattice;
// int path_num;
int anno;
int last_anno;
/* for parse */
int offset;
int cursor;
int head_path;
int path_idx[PATH1_NUM];
} cha_lat_t;
enum cha_segtype {
SEGTYPE_NORMAL,
SEGTYPE_UNSPECIFIED,
SEGTYPE_MORPH,
SEGTYPE_ANNOTATION
};
typedef struct _cha_seg_t cha_seg_t;
struct _cha_seg_t {
unsigned char *text;
int len;
char char_type[CHA_INPUT_SIZE]; /* XXX */
enum cha_segtype type;
char is_undef;
unsigned short posid;
unsigned char inf_type;
unsigned char inf_form;
int anno_no;
};
/* information for annotation */
typedef struct _anno_info {
int hinsi;
char *str1, *str2;
int len1, len2;
char *format;
} anno_info;
/* information for unseen word */
typedef struct _undef_info {
int cost, cost_step;
int con_tbl;
int hinsi;
} undef_info;
typedef struct _cha_mmap_t cha_mmap_t;
typedef struct _cha_block_t cha_block_t;
/*
* global variables
*/
extern cha_block_t *Cha_mrph_block;
extern path_t *Cha_path;
extern int Cha_path_num;
extern int Cha_con_cost_weight, Cha_con_cost_undef;
extern int Cha_mrph_cost_weight, Cha_cost_width;
extern int Space_pos_hinsi;
extern anno_info Cha_anno_info[UNDEF_HINSI_MAX];
extern undef_info Cha_undef_info[UNDEF_HINSI_MAX];
extern int Cha_undef_info_num;
extern char *Cha_bos_string;
extern char *Cha_eos_string;
extern int Cha_output_iscompound;
/*
* functions
*/
/* init.c */
void cha_read_rcfile_fp(FILE*);
void cha_init(void);
/* print.c */
char *cha_get_output(void);
void cha_set_output(FILE*);
void cha_print_reset(void);
void cha_printf_mrph(cha_lat_t*, int, mrph_data_t*, char*);
void cha_print_path(cha_lat_t*, int, int, char*);
void cha_print_bos_eos(int);
void cha_print_hinsi_table(void);
void cha_print_ctype_table(void);
void cha_print_cform_table(void);
/* parse.c */
int cha_parse_bos(cha_lat_t*);
int cha_parse_eos(cha_lat_t*);
int cha_parse_segment(cha_lat_t*, cha_seg_t*);
/* chalib.c */
void cha_version(FILE*);
void cha_set_opt_form(char*);
void cha_set_cost_width(int);
void cha_set_language(char*);
char *cha_fgets(char*, int, FILE*);
void cha_read_dadic(chasen_cell_t*);
/* cha_jfgets.c */
void cha_set_jfgets_delimiter(char*);
char *cha_fget_line(char*, int, FILE*);
char *cha_jfgets(char*, int, FILE*);
int cha_jistoeuc(unsigned char*, unsigned char*);
/* mmap.c */
cha_mmap_t *cha_mmap_file(char*);
cha_mmap_t *cha_mmap_file_w(char*);
void cha_munmap_file(cha_mmap_t*);
void *cha_mmap_map(cha_mmap_t*);
off_t cha_mmap_size(cha_mmap_t*);
/* block.c */
cha_block_t *cha_block_new(size_t, int);
void cha_block_delete(cha_block_t*);
void *cha_block_new_item(cha_block_t*);
void *cha_block_get_item(cha_block_t*, int);
void *cha_block_pop(cha_block_t*);
int cha_block_num(cha_block_t*);
void cha_block_clear(cha_block_t*);
#endif /* __CHALIB_H__ */
|