File: RoutingAlphMgr.cpp

package info (click to toggle)
dasher 4.11%2Bgit20130508.adc653-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 40,248 kB
  • ctags: 5,158
  • sloc: xml: 185,479; cpp: 32,301; sh: 11,207; makefile: 828; ansic: 483
file content (193 lines) | stat: -rw-r--r-- 8,159 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
//
//  RoutingAlphMgr.cpp
//  Dasher
//
//  Created by Alan Lawrence on 13/12/11.
//  Copyright 2011 Cambridge University. All rights reserved.
//

#include "RoutingAlphMgr.h"
#include "DasherInterfaceBase.h"
using namespace std;
using namespace Dasher;

// Track memory leaks on Windows to the line that new'd the memory
#ifdef _WIN32
#ifdef _DEBUG_MEMLEAKS
#define DEBUG_NEW new( _NORMAL_BLOCK, THIS_FILE, __LINE__ )
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
#endif

CRoutingAlphMgr::CRoutingAlphMgr(CSettingsUser *pCreator, CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet)
: CAlphabetManager(pCreator, pInterface, pNCManager, pAlphabet) {
  
  DASHER_ASSERT(pAlphabet->m_iConversionID==3 || pAlphabet->m_iConversionID==4);
}

void CRoutingAlphMgr::InitMap() {
  m_vBaseSyms.reserve(m_pAlphabet->iEnd); m_vBaseSyms.push_back(0); //base for unknown route = unknown!
  m_vRoutes.push_back(set<symbol>()); //unknown base symbol has no routes
  for (int i=1; i<m_pAlphabet->iEnd; i++) {
    symbol s = m_map.Get(m_pAlphabet->GetText(i));
    if (s==0) {
      s=m_vRoutes.size();
      m_vRoutes.push_back(set<symbol>());
      m_map.Add(m_pAlphabet->GetText(i),s);
    }
    m_vBaseSyms.push_back(s);
    m_vRoutes[s].insert(i);
  }
  m_vGroupsByRoute.resize(m_vBaseSyms.size());
  vector<const SGroupInfo *> vGroups;
  DASHER_ASSERT(!m_pAlphabet->pNext);
  vGroups.push_back(m_pAlphabet->pChild);
  while (!vGroups.empty()) {
    const SGroupInfo *g(vGroups.back()); vGroups.pop_back();
    if (!g) continue;
    for (int i=g->iStart; i<g->iEnd; i++) m_vGroupsByRoute[i]=g;
    vGroups.push_back(g->pNext);
    vGroups.push_back(g->pChild);
  }
}

void CRoutingAlphMgr::CreateLanguageModel() {
  m_pLanguageModel = new CRoutingPPMLanguageModel(this, &m_vBaseSyms, &m_vRoutes, m_pAlphabet->m_iConversionID==4);
}

string CRoutingAlphMgr::CRoutedSym::trainText() {
  const set<symbol> &routes(mgr()->m_vRoutes[mgr()->m_vBaseSyms[iSymbol]]);
  DASHER_ASSERT(routes.count(iSymbol));
  string t=CSymbolNode::trainText();
  if (routes.size()!=1)
    if (const SGroupInfo *g = mgr()->m_vGroupsByRoute[iSymbol])
      return mgr()->m_pAlphabet->m_strConversionTrainStart + g->strName + mgr()->m_pAlphabet->m_strConversionTrainStop + t;
  return t;
}

CRoutingAlphMgr::CRoutedSym::CRoutedSym(int iOffset, CDasherScreen::Label *pLabel, CRoutingAlphMgr *pMgr, symbol iSymbol)
: CSymbolNode(iOffset, pLabel, pMgr, iSymbol) {
};


CAlphabetManager::CAlphNode *CRoutingAlphMgr::CreateSymbolRoot(int iOffset, CLanguageModel::Context ctx, symbol sym) {
  //sym is from the map, so a base symbol. It's at the end of the context,
  // TODO unless this is the completely-empty context,
  // so ask the LM for which way it's most likely to have been entered
  sym = static_cast<CRoutingPPMLanguageModel*>(m_pLanguageModel)->GetBestRoute(ctx);
  return new CRoutedSym(iOffset, m_vLabels[sym], this, sym);
}

int CRoutingAlphMgr::GetColour(symbol route, int iOffset) const {
  int iColour = m_pAlphabet->GetColour(route); //colours were rehashed with CH symbol text
  if (iColour==-1) {
    //none specified in alphabet
    static int colourStore[2][3] = {
      {66,//light blue
        64,//very light green
        62},//light yellow
      {78,//light purple
        81,//brownish
        60},//red
    };    
    return colourStore[iOffset&1][route % 3];
  }
  if ((iOffset&1)==0 && iColour<130) iColour+=130;
  return iColour;
}


CDasherNode *CRoutingAlphMgr::CreateSymbolNode(CAlphNode *pParent, symbol iSymbol) {

  int iNewOffset = pParent->offset()+1;
  if (m_pAlphabet->GetText(iSymbol)=="\r\n") iNewOffset++;
  CSymbolNode *pAlphNode = new CRoutedSym(iNewOffset, m_vLabels[iSymbol], this, iSymbol);
  
  pAlphNode->iContext = m_pLanguageModel->CloneContext(pParent->iContext);
  
  //namely, we want to enter only the BASE symbol into the LM, not the route
  // (which would be out of range):
  m_pLanguageModel->EnterSymbol(pAlphNode->iContext, m_vBaseSyms[iSymbol]);
  // (Unfortunately, we can't make EnterSymbol take route numbers, because
  // it has base symbols passed to it from the alphabet map)
  return pAlphNode;

}

CRoutingAlphMgr::CRoutingTrainer::CRoutingTrainer(CMessageDisplay *pMsgs, CRoutingAlphMgr *pMgr)
: CTrainer(pMsgs, pMgr->m_pLanguageModel, pMgr->m_pAlphabet, &pMgr->m_map), m_pMgr(pMgr) {
  
  m_iStartSym=0;  
  vector<symbol> trainStartSyms;
  m_pAlphabet->GetSymbols(trainStartSyms, m_pInfo->m_strConversionTrainStart);
  if (trainStartSyms.size()==1)
    m_iStartSym = trainStartSyms[0];
  else
    m_pMsgs->FormatMessageWithString(_("Warning: faulty alphabet definition: training-start delimiter %s must be a single unicode character. May be unable to process training file."),
                                     m_pInfo->m_strConversionTrainStart.c_str());
}

symbol CRoutingAlphMgr::CRoutingTrainer::getRoute(bool bHaveRoute, const string &strRoute, symbol baseSym) {  
  const set<symbol> &candidates(m_pMgr->m_vRoutes.at(baseSym));
  set<symbol> named;
  for (set<symbol>::iterator it=candidates.begin(); it!=candidates.end(); it++)
    if (const SGroupInfo *g=m_pMgr->m_vGroupsByRoute[*it])
      if (g->strName == strRoute)
        named.insert(*it);
  //if no name was given, but a single group with no name exists, use it!
  if (named.size()==1) return *(named.begin());
  //otherwise, we will not learn a route - but this is fine, we can learn
  // that later more-or-less independently
  
  if (bHaveRoute) {
    m_pMsgs->FormatMessageWith2Strings((named.size()==0)
                                       ? _("Warning: training file contains character '%s' as member of group '%s', but no group of that name contains the character. Ignoring group specifier.")
                                       : _("Warning: training file contains character '%s' as member of group '%s', but alphabet contains several such groups. Dasher will not be able to learn how you want to write this character."),
                                         m_pInfo->GetDisplayText(baseSym).c_str(),
                                         strRoute.c_str());
  }
  // don't flag a problem if no route specified
  
  return 0;
}

void CRoutingAlphMgr::CRoutingTrainer::Train(CAlphabetMap::SymbolStream &syms) {
  CLanguageModel::Context trainContext = m_pLanguageModel->CreateEmptyContext();
  
  string strRoute; bool bHaveRoute(false);
  for (symbol sym; (sym=syms.next(m_pAlphabet))!=-1;) {
    if (sym == m_iStartSym) {
      if (sym!=0 || syms.peekBack()==m_pInfo->m_strConversionTrainStart) {
        if (bHaveRoute)
          m_pMsgs->FormatMessageWithString(_("Warning: in training file, annotation '<%s>' is followed by another annotation and will be ignored"),
                                           strRoute.c_str());
        strRoute.clear(); bHaveRoute=true;
        for (string s; (s=syms.peekAhead()).length(); strRoute+=s) {
          syms.next(m_pAlphabet);
          if (s==m_pInfo->m_strConversionTrainStop) break;
        }
        continue; //read next, hopefully a CH (!)
      } //else, unknown symbol, but does not match pinyin delimiter; fallthrough
    }
    if (readEscape(trainContext, sym, syms)) continue; //TODO warn if py lost?
                                                       //OK, sym is a (CH) symbol to learn.
    if (sym) {
      if (symbol route = getRoute(bHaveRoute, strRoute, sym))
        m_pLanguageModel->LearnSymbol(trainContext, route);
      else
        static_cast<CRoutingPPMLanguageModel*>(m_pLanguageModel)->LearnBaseSymbol(trainContext, sym);
    } //else, silently drop - as standard CTrainer
    bHaveRoute=false; strRoute.clear();
  }
  m_pLanguageModel->ReleaseContext(trainContext);
}


CTrainer *CRoutingAlphMgr::GetTrainer() {
  //We pass in the pinyin alphabet to define the context-switch escape character, and the default context.
  // Although the default context will be symbolified via the _chinese_ alphabet, this seems reasonable
  // as it is the Pinyin alphabet which defines the conversion mapping (i.e. m_strConversionTarget!)
  return new CRoutingTrainer(m_pInterface, this);
}