File: DictLanguageModel.h

package info (click to toggle)
dasher 4.11%2Bgit20130508.adc653-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 40,248 kB
  • ctags: 5,158
  • sloc: xml: 185,479; cpp: 32,301; sh: 11,207; makefile: 828; ansic: 483
file content (155 lines) | stat: -rw-r--r-- 4,449 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// PPMLanguageModel.h
//
/////////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 1999-2004 David Ward
//
/////////////////////////////////////////////////////////////////////////////

#ifndef __DictLanguageModel_h__
#define __DictLanguageModel_h__

#include "../../Common/NoClones.h"
#include "../../Common/Allocators/PooledAlloc.h"
#include "PPMLanguageModel.h"
#include "../Alphabet/AlphInfo.h"
#include "../Alphabet/AlphabetMap.h"
#include <vector>
#include <map>
#include <string>
#include <stdio.h>

//static char dumpTrieStr[40000];
//const int maxcont =200;

namespace Dasher {
  /// \ingroup LM
  /// \{
  class CDictLanguageModel:public CLanguageModel, protected CSettingsUser {
  public:
    CDictLanguageModel(CSettingsUser *pCreator, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap);
    virtual ~CDictLanguageModel();

    Context CreateEmptyContext();
    void ReleaseContext(Context context);
    Context CloneContext(Context context);

    virtual void GetProbs(Context Context, std::vector < unsigned int >&Probs, int iNorm, int iUniform) const;

    virtual void EnterSymbol(Context context, int Symbol);
    virtual void LearnSymbol(Context context, int Symbol) {
      EnterSymbol(context, Symbol);
    };                          // Never learn in this model

  private:

    void MyLearnSymbol(Context context, int Symbol);

    class CDictnode {
    public:
      CDictnode * find_symbol(int sym) const;
      CDictnode *child;
      CDictnode *next;
      CDictnode *vine;
      unsigned short int count;
      int sbl;

        CDictnode(int sym);
        CDictnode();
    };

    class CDictContext {
    public:
      CDictContext(CDictContext const &input) {
        head = input.head;
        word_head = input.word_head;
        current_word = input.current_word;
        order = input.order;
        word_order = input.word_order;
    } CDictContext(CDictnode * _head = 0, int _order = 0):head(_head), order(_order), word_head(_head), word_order(0) {
      };                        // FIXME - doesn't work if we're trying to create a non-empty context
      ~CDictContext() {
      };
      void dump();
      CDictnode *head;
      int order;

      std::string current_word;
      CDictnode *word_head;
      int word_order;

    };
    
    const CAlphabetMap *m_pAlphMap;
    const int m_iSpaceSymbol;

    CDictnode *AddSymbolToNode(CDictnode * pNode, symbol sym, int *update);

    void AddSymbol(CDictContext & context, symbol sym);

    void CollapseContext(CDictContext & context) const;

    int lookup_word(const std::string & w);
    int lookup_word_const(const std::string & w) const;

    CDictContext *m_rootcontext;
    CDictnode *m_pRoot;

    std::map < std::string, int >dict;  // Dictionary
    int nextid;

    int NodesAllocated;

    int max_order;

    mutable CSimplePooledAlloc < CDictnode > m_NodeAlloc;
    CPooledAlloc < CDictContext > m_ContextAlloc;
  };
  /// \}

////////////////////////////////////////////////////////////////////////
// Inline functions 
////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////

  inline Dasher::CDictLanguageModel::CDictnode::CDictnode(symbol sym):sbl(sym) {
    child = next = vine = 0;
    count = 1;
  }

////////////////////////////////////////////////////////////////////////

  inline CDictLanguageModel::CDictnode::CDictnode() {
    child = next = vine = 0;
    count = 1;
  }

///////////////////////////////////////////////////////////////////

  inline CLanguageModel::Context CDictLanguageModel::CreateEmptyContext() {
    CDictContext *pCont = m_ContextAlloc.Alloc();
    *pCont = *m_rootcontext;
    return (Context) pCont;
  }

///////////////////////////////////////////////////////////////////

  inline CLanguageModel::Context CDictLanguageModel::CloneContext(Context Copy) {
    CDictContext *pCont = m_ContextAlloc.Alloc();
    CDictContext *pCopy = (CDictContext *) Copy;
    *pCont = *pCopy;
    return (Context) pCont;
  }

///////////////////////////////////////////////////////////////////

  inline void CDictLanguageModel::ReleaseContext(Context release) {
    m_ContextAlloc.Free((CDictContext *) release);
  }

///////////////////////////////////////////////////////////////////

}                               // end namespace Dasher

#endif /* #ifndef __DictLanguageModel_H__ */