1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
// PPMLanguageModel.h
//
/////////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 1999-2004 David Ward
//
/////////////////////////////////////////////////////////////////////////////
#ifndef __DictLanguageModel_h__
#define __DictLanguageModel_h__
#include "../../Common/NoClones.h"
#include "../../Common/Allocators/PooledAlloc.h"
#include "PPMLanguageModel.h"
#include "../Alphabet/AlphInfo.h"
#include "../Alphabet/AlphabetMap.h"
#include <vector>
#include <map>
#include <string>
#include <stdio.h>
//static char dumpTrieStr[40000];
//const int maxcont =200;
namespace Dasher {
/// \ingroup LM
/// \{
class CDictLanguageModel:public CLanguageModel, protected CSettingsUser {
public:
CDictLanguageModel(CSettingsUser *pCreator, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap);
virtual ~CDictLanguageModel();
Context CreateEmptyContext();
void ReleaseContext(Context context);
Context CloneContext(Context context);
virtual void GetProbs(Context Context, std::vector < unsigned int >&Probs, int iNorm, int iUniform) const;
virtual void EnterSymbol(Context context, int Symbol);
virtual void LearnSymbol(Context context, int Symbol) {
EnterSymbol(context, Symbol);
}; // Never learn in this model
private:
void MyLearnSymbol(Context context, int Symbol);
class CDictnode {
public:
CDictnode * find_symbol(int sym) const;
CDictnode *child;
CDictnode *next;
CDictnode *vine;
unsigned short int count;
int sbl;
CDictnode(int sym);
CDictnode();
};
class CDictContext {
public:
CDictContext(CDictContext const &input) {
head = input.head;
word_head = input.word_head;
current_word = input.current_word;
order = input.order;
word_order = input.word_order;
} CDictContext(CDictnode * _head = 0, int _order = 0):head(_head), order(_order), word_head(_head), word_order(0) {
}; // FIXME - doesn't work if we're trying to create a non-empty context
~CDictContext() {
};
void dump();
CDictnode *head;
int order;
std::string current_word;
CDictnode *word_head;
int word_order;
};
const CAlphabetMap *m_pAlphMap;
const int m_iSpaceSymbol;
CDictnode *AddSymbolToNode(CDictnode * pNode, symbol sym, int *update);
void AddSymbol(CDictContext & context, symbol sym);
void CollapseContext(CDictContext & context) const;
int lookup_word(const std::string & w);
int lookup_word_const(const std::string & w) const;
CDictContext *m_rootcontext;
CDictnode *m_pRoot;
std::map < std::string, int >dict; // Dictionary
int nextid;
int NodesAllocated;
int max_order;
mutable CSimplePooledAlloc < CDictnode > m_NodeAlloc;
CPooledAlloc < CDictContext > m_ContextAlloc;
};
/// \}
////////////////////////////////////////////////////////////////////////
// Inline functions
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
inline Dasher::CDictLanguageModel::CDictnode::CDictnode(symbol sym):sbl(sym) {
child = next = vine = 0;
count = 1;
}
////////////////////////////////////////////////////////////////////////
inline CDictLanguageModel::CDictnode::CDictnode() {
child = next = vine = 0;
count = 1;
}
///////////////////////////////////////////////////////////////////
inline CLanguageModel::Context CDictLanguageModel::CreateEmptyContext() {
CDictContext *pCont = m_ContextAlloc.Alloc();
*pCont = *m_rootcontext;
return (Context) pCont;
}
///////////////////////////////////////////////////////////////////
inline CLanguageModel::Context CDictLanguageModel::CloneContext(Context Copy) {
CDictContext *pCont = m_ContextAlloc.Alloc();
CDictContext *pCopy = (CDictContext *) Copy;
*pCont = *pCopy;
return (Context) pCont;
}
///////////////////////////////////////////////////////////////////
inline void CDictLanguageModel::ReleaseContext(Context release) {
m_ContextAlloc.Free((CDictContext *) release);
}
///////////////////////////////////////////////////////////////////
} // end namespace Dasher
#endif /* #ifndef __DictLanguageModel_H__ */
|