1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
|
#include "parsers/PmatchCompiler.h"
namespace hfst {
//hfst::ImplementationType get_default_fst_type();
// Mostly copied from file 'tools/src/hfst-pmatch2fst.cc'.
// TODO: HfstTransducer pointers in variable 'definitions' need to be deleted manually?
std::vector<hfst::HfstTransducer> compile_pmatch_expression(const std::string & pmatch)
{
std::vector<hfst::HfstTransducer> retval;
hfst::pmatch::PmatchCompiler comp(hfst::TROPICAL_OPENFST_TYPE);
comp.set_verbose(false/*verbose*/);
comp.set_flatten(false/*flatten*/);
std::map<std::string, hfst::HfstTransducer*> definitions;
try {
definitions = comp.compile(pmatch);
} catch(...) {
throw HfstException(); // TODO
}
// A dummy transducer with an alphabet with all the symbols
hfst::HfstTransducer harmonizer(hfst::TROPICAL_OPENFST_TYPE);
// First we need to collect a unified alphabet from all the transducers.
hfst::StringSet symbols_seen;
for (std::map<std::string, hfst::HfstTransducer *>::const_iterator it =
definitions.begin(); it != definitions.end(); ++it) {
hfst::StringSet string_set = it->second->get_alphabet();
for (hfst::StringSet::const_iterator sym = string_set.begin();
sym != string_set.end(); ++sym) {
if (symbols_seen.count(*sym) == 0) {
harmonizer.disjunct(hfst::HfstTransducer(*sym, hfst::TROPICAL_OPENFST_TYPE));
symbols_seen.insert(*sym);
}
}
}
if (symbols_seen.size() == 0) {
// We don't recognise anything, go home early
std::cerr << "Empty ruleset, nothing to write\n";
throw HfstException(); // TODO
}
// Then we convert it...
harmonizer.convert(hfst::HFST_OLW_TYPE);
// Use these for naughty intermediate steps to make sure
// everything has the same alphabet
hfst::HfstBasicTransducer * intermediate_tmp;
hfst_ol::Transducer * harmonized_tmp;
hfst::HfstTransducer * output_tmp;
// When done compiling everything, look for TOP and output it first.
if (definitions.count("TOP") == 1) {
intermediate_tmp = hfst::implementations::ConversionFunctions::
hfst_transducer_to_hfst_basic_transducer(*definitions["TOP"]);
harmonized_tmp = hfst::implementations::ConversionFunctions::
hfst_basic_transducer_to_hfst_ol(intermediate_tmp,
true, // weighted
"", // no special options
&harmonizer); // harmonize with this
output_tmp = hfst::implementations::ConversionFunctions::
hfst_ol_to_hfst_transducer(harmonized_tmp);
output_tmp->set_name("TOP");
retval.push_back(*output_tmp);
delete definitions["TOP"];
definitions.erase("TOP");
delete intermediate_tmp;
delete output_tmp;
for (std::map<std::string, hfst::HfstTransducer *>::iterator it =
definitions.begin(); it != definitions.end(); ++it) {
intermediate_tmp = hfst::implementations::ConversionFunctions::
hfst_transducer_to_hfst_basic_transducer(*(it->second));
harmonized_tmp = hfst::implementations::ConversionFunctions::
hfst_basic_transducer_to_hfst_ol(intermediate_tmp,
true, // weighted
"", // no special options
&harmonizer); // harmonize with this
output_tmp = hfst::implementations::ConversionFunctions::
hfst_ol_to_hfst_transducer(harmonized_tmp);
output_tmp->set_name(it->first);
retval.push_back(*output_tmp);
delete it->second;
delete intermediate_tmp;
delete output_tmp;
}
} else {
std::cerr << "Empty ruleset, nothing to write\n";
throw HfstException(); // TODO
}
return retval;
}
}
|