File: libstardict.h

package info (click to toggle)
sdcv 0.3.4-1
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 2,644 kB
  • ctags: 1,207
  • sloc: sh: 9,235; ansic: 7,803; cpp: 2,613; makefile: 485; yacc: 316; sed: 16
file content (179 lines) | stat: -rw-r--r-- 5,480 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#ifndef __SD_LIB_H__
#define __SD_LIB_H__

#ifdef HAVE_CONFIG_H
#  include "config.h"
#endif

#include <vector>
#include <string>
#include <cstdio>

#include <glib.h>

#include "dictziplib.h"
#include "mapfile.h"
#include "lib_utils.h"


struct cacheItem{
  glong offset;
  gchar *data;
  inline cacheItem(void) : data(NULL){}
  inline ~cacheItem(){g_free(data);}
};

const int WORDDATA_CACHE_NUM = 10;
const int INVALID_INDEX=-100;

struct TDictInfo{
  std::string ifofilename;
  glong wordcount;
  std::string bookname;
  std::string author;
  std::string email;
  std::string website;
  std::string date;
  std::string description;
  gulong idxfilesize;
  std::string sametypesequence;
};

class DictBase{
public:
  DictBase(void);
  ~DictBase();
  char * GetWordData(glong idxitem_offset, glong idxitem_size);
  static bool get_dict_info(const char *ifofilename, TDictInfo & dict_info, 
			    bool istreedict=false);
protected:
  char *sametypesequence;
  FILE *dictfile;
  dictData *dictdzfile;
private:
  struct cacheItem cache[WORDDATA_CACHE_NUM];
  gint cache_cur;	
};


struct TSeqBundle{
  gunichar val;
  gunichar dif;
  TSeqBundle(void){}
  TSeqBundle(gunichar v, gunichar d) : val(v), dif(d){}
};

typedef std::vector<TDictInfo> TDictInfoList;

class Dictionary : public DictBase{
private:
  glong wordcount;
  char *bookname;
  FILE *idxfile;
  union{gchar **wordlist;  glong *wordoffset;};
  bool need_free_wordoffset;
  union{gchar *idxdatabuffer;  glong cur_wordindex;};

  TMapFile map_wordoffset;
  std::vector<glong> alphabet_index;
  std::vector<TSeqBundle> bundle_list;

  // The length of "word_str" should be less than 256. See src/tools/DICTFILE_FORMAT.
  gchar wordentry_buf[256]; 
  glong wordentry_offset;
  glong wordentry_size;
  const gchar *cache_dir;
  gchar *m_ifofilename;

  bool load_ifofile(const char *ifofilename, gulong *idxfilesize);
  void loadwordlist(void);	
  bool loadwordoffset(const char *idxfilename, gulong idxfilesize);
public:
  explicit Dictionary(const char *_cache_dir);
  ~Dictionary();
  bool load(const char *ifofilename);
  inline glong length(void) const{ return(wordcount); }
  inline char* GetBookname(void) const{ return(bookname); }
  bool Lookup(const char* sWord,glong *pIndex);
  bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, gint iBuffLen);
  char * GetWord(glong index);
  char * GetWordData(glong index);
  const char *get_ifofilename(void) const{return m_ifofilename;}
};

//============================================================================
// struct
struct Fuzzystruct {
  char * pMatchWord;
  int iMatchWordDistance;
};

typedef void TProgressFunc(void);

const int MAX_FUZZY_MATCH_ITEM=100;
const int MAX_FUZZY_DISTANCE= 3; // at most MAX_FUZZY_DISTANCE-1 differences allowed when find similar words
const int MAX_FLOAT_WINDOW_FUZZY_MATCH_ITEM=5;
const int MAX_MATCH_ITEM_PER_LIB=100;

class Library{
private:
  std::vector<Dictionary*> oLib; // word library.
 
  int iMaxFuzzyDistance;
  char *cache_dir;
  
  static int FuzzystructCompare(const void * s1, const void * s2);
  static int MatchWordCompare(const void * s1, const void * s2);

  void LoadDir(char *dirname, const GSList *order_list, const GSList *disable_list);
  bool SimpleLookupWord(const char* sWord, glong& iWordIndex, int iLib);
  inline bool LookupWord(const char* sWord, glong & iWordIndex, int iLib){
    return oLib[iLib]->Lookup(sWord, &iWordIndex);
  }
  bool LookupSimilarWord(const char* sWord, glong & iWordIndex, int iLib);
  inline bool LookdupWordsWithRule(GPatternSpec *pspec,glong* aiIndexes, 
				   int iLen, int iLib){  
    return oLib[iLib]->LookupWithRule(pspec,aiIndexes,iLen);
  }
public:
  explicit Library(char *_cache_dir);
  ~Library();
  void Load(const GSList *order_list, const GSList  *disable_list, const gchar *data_dir=NULL);
  void ReLoad(const GSList *order_list, const GSList *disable_list, const gchar *data_dir=NULL);
  inline glong iLength(int iLib) const{return (oLib[iLib])->length();} 
  inline const char* GetBookname(int iLib) const{
    return (oLib[iLib])->GetBookname();
  }
  inline gint total_libs(void) const{return oLib.size();}
  inline const char* get_ifofilename(gint iDict) const{
    return oLib[iDict]->get_ifofilename();
  }
  inline char *poGetWord(glong iIndex, int iLib){
    return( oLib[iLib]->GetWord(iIndex) );
  }
  inline char *poGetWordData(glong iIndex,int iLib){
    if(iIndex==INVALID_INDEX)
      return NULL;
    return oLib[iLib]->GetWordData(iIndex);
  }
 
  char *poGetCurrentWord(glong * iCurrent);
  char *poGetNextWord(const gchar *word, glong * iCurrent);
  char *poGetPreWord(glong * iCurrent);

  bool SimpleLookup(const char* sWord, char ** & ppWord, 
		    char ** & ppWordData, char * & SearchWord);
  bool SimpleLookup(const char* sWord, glong* piIndex,
		    char ** & ppWord, char ** & ppWordData,
		    bool piIndexValid, 
		    bool bTryMoreIfNotFound);

  bool LookupWithFuzzy(const char *sWord, Fuzzystruct * & oFuzzystruct, TProgressFunc progress_func=NULL);
  bool LookupWithFuzzy(const char *sWord, char *** & pppWord, char *** & pppWordData, char ** & ppOriginWord, gint & count, TProgressFunc progress_func=NULL);
  gint LookupWithRule(const char *word, char ** & ppMatchWord, TProgressFunc progress_func=NULL);
  static void load_dict_info_list_from_dir(const char *dir_name, TDictInfoList & dict_info_list);
  static void get_dict_info_list(TDictInfoList & dict_info_list, const char *data_dir=NULL);
};


#endif