File: LanguageModel.h

package info (click to toggle)
dasher 4.11%2Bgit20130508.adc653-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 40,248 kB
  • ctags: 5,158
  • sloc: xml: 185,479; cpp: 32,301; sh: 11,207; makefile: 828; ansic: 483
file content (159 lines) | stat: -rw-r--r-- 3,518 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
// LanguageModel.h
//
/////////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2001-2005 David Ward
//
/////////////////////////////////////////////////////////////////////////////

#ifndef __LanguageModelling_LanguageModel_h__
#define __LanguageModelling_LanguageModel_h__

#include "../DasherTypes.h"


#include <vector>

/////////////////////////////////////////////////////////////////////////////

namespace Dasher {
  class CLanguageModel;
}

///
/// \defgroup LM Language modelling
/// @{

///
/// \brief Language model base class
/// Base class for all language model components
///

class Dasher::CLanguageModel
{
public:

  /////////////////////////////////////////////////////////////////////////////

  CLanguageModel(int iNumSyms) : m_iNumSyms(iNumSyms) {};

  virtual ~CLanguageModel() {};
  
  /// 
  /// Index of registered context 
  ///

  typedef size_t Context;

  ///
  /// Representation of an invalid context
  ///

  static const size_t nullContext = 0;

  ///
  /// @name Context manipulation
  /// Functions for creating, destroying and altering contexts
  /// @{

  ///
  /// Create an empty context
  ///

  virtual Context CreateEmptyContext() = 0;

  ///
  /// Create a copy of an existing context
  ///

  virtual Context CloneContext(Context Context) = 0;

  ///
  /// Free resources associated with a context
  ///

  virtual void ReleaseContext(Context Context) = 0;

  ///
  /// Update context with a character - only modifies context
  ///

  virtual void EnterSymbol(Context context, int Symbol) = 0;

  ///
  /// Add character to the language model at the current context and update the context 
  /// - modifies both the context and the LanguageModel
  ///

  virtual void LearnSymbol(Context context, int Symbol) = 0;

  /// @}

  /// @name Prediction
  /// Determination of probabilities in a given context
  /// @{
  
  ///
  /// Get symbol probability distribution
  /// 

  virtual void GetProbs(Context Context, std::vector < unsigned int >&Probs, int iNorm, int iUniform) const = 0;

  /// @}

  /// @name Persistant storage
  /// Binary representation of language model state
  /// @{

  virtual bool WriteToFile(std::string strFilename) {
    return false;
  };

  virtual bool ReadFromFile(std::string strFilename) {
    return false;
  };

  /// @}

  ///
  /// Get the maximum useful context length for this language model

  virtual int GetContextLength() const {
    // TODO: Fix hard coded value
    return 5;
  };

 protected:
  struct SLMFileHeader {
    // Magic number ("%DLF" in ASCII)
    char szMagic[4];
    // Version of the header
    unsigned short int iHeaderVersion;
    // Total size of header (including variable length alphabet name)
    unsigned short int iHeaderSize;
    // ID of the language model
    unsigned short int iLMID;
    // Version number of the language model
    unsigned short int iLMVersion;
    // Minimum compatible version for the language model
    unsigned short int iLMMinVersion;
    // Number of characters in the alphabet
    unsigned short int iAlphabetSize;
    // UTF-8 encoded alphabet name follows (variable length struct)
  };

  ///Return the number of symbols over which we are making predictions, plus one
  /// (to leave space for an initial 0).
  int GetSize() const {
    return m_iNumSyms+1;
  }

  const int m_iNumSyms;

};

/// @}

/////////////////////////////////////////////////////////////////////////////

#endif // ndef __LanguageModelling_LanguageModel_h__