File: mmEM.h

package info (click to toggle)
m2m-aligner 1.2-2
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 5,348 kB
  • sloc: cpp: 4,709; sh: 1,144; xml: 944; makefile: 292
file content (137 lines) | stat: -rw-r--r-- 3,478 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/********************************************************************
*
* file: mmEM.h
*
* Copyright (c) 2007, Sittichai Jiampojamarn
* All rights reserved.
* 
* See the file COPYING in the top directory of this distribution
* for more information.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*********************************************************************/

#pragma once

#include "param.h"
#include "util.h"
#include <map>
#include <vector>
#include <string>
#include <fstream>
#include <iostream>
#include <cmath>
#include <cstdlib>
#include <algorithm>
#include <climits>
#include <set>

// require STLport //
#ifdef USESTLPORT
#include <hash_map>
#endif

using namespace std;

typedef vector<long double> vector_Double;
typedef vector<vector_Double> vector_2Double;

typedef vector<int> vector_int;
typedef vector<vector_int> vector_2int;

typedef vector<string> vector_str;
typedef vector<vector_str> vector_2str;

typedef struct QTABLE
{
	long double score;
	int backX;
	int backY;
	int backR;
} qtable;

typedef vector<qtable> vector_qtable;
typedef vector<vector_qtable> vector_2qtable;
typedef vector<vector_2qtable> vector_3qtable;

 struct eqstr
{
  bool operator()(const string s1, const string s2) const
  {
          return s1.compare(s2) == 0;
  }
};

 inline bool DqSortedFn (qtable i, qtable j)
{
	return (i.score > j.score);
}

 typedef struct INITTABLE
 {
	 string xstring;
	 string ystring;
	 long double prob;
 } initTable;

 inline bool initTableSortedFn(initTable i, initTable j)
 {
	 return (i.prob > j.prob);
 }

 typedef vector<initTable> vector_initTable;

 // The program is a lot faster when using STLPORT //

// use standard STL//
#ifndef USESTLPORT
typedef map<string, long double> hash_StrDouble;
typedef map<string, hash_StrDouble> hash_2StrDouble;
#endif

// use STLport //
#ifdef USESTLPORT
typedef hash_map<string, long double, hash<string>, eqstr> hash_StrDouble;
typedef hash_map<string, hash_StrDouble, hash<string>, eqstr> hash_2StrDouble;
#endif


#define LOWLOGPROB -1e12

class mmEM
{
	hash_2StrDouble probs;
	hash_2StrDouble counts;
	set<string> limitSet;
public:
	mmEM(void);
	~mmEM(void);

	void training(param myParam);
	void initialization(param myParam, vector_2str stringX, vector_2str stringY);
	long double maximization(param myParam);
	bool expectation(param myParam, vector_str x, vector_str y);
	vector_2Double forwardEval(param myParam, vector_str x, vector_str y);
	vector_2Double backwardEval(param myParam, vector_str x, vector_str y);

	void createAlignments(param myParam);
	long double viterbi_align(param myParam, vector_str x, vector_str y, vector<string> *alignX, vector<string> *alignY);
	vector<long double> nViterbi_align(param myParam, vector_str x, vector_str y, vector_2str &alignX, vector_2str &alignY);

	void printAlphaBeta(vector_2Double alpha);
	void writeAlingerToFile(param myParam);
	void readAlignerFromFile(param myParam);

	void readFileXY(param myParam, string filename, vector_2str *wordX, vector_2str *wordY);

	void readInitFile(param myParam);
};