File: hfst_pmatch_extensions.cpp

package info (click to toggle)
hfst 3.16.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 14,532 kB
  • sloc: cpp: 101,875; sh: 6,717; python: 5,225; yacc: 4,985; lex: 2,900; makefile: 2,017; xml: 6
file content (93 lines) | stat: -rw-r--r-- 4,068 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include "parsers/PmatchCompiler.h"

namespace hfst {

  //hfst::ImplementationType get_default_fst_type();

// Mostly copied from file 'tools/src/hfst-pmatch2fst.cc'.
// TODO: HfstTransducer pointers in variable 'definitions' need to be deleted manually?
std::vector<hfst::HfstTransducer> compile_pmatch_expression(const std::string & pmatch)
{
    std::vector<hfst::HfstTransducer> retval;
    hfst::pmatch::PmatchCompiler comp(hfst::TROPICAL_OPENFST_TYPE);
    comp.set_verbose(false/*verbose*/);
    comp.set_flatten(false/*flatten*/);
    std::map<std::string, hfst::HfstTransducer*> definitions;
    try {
      definitions = comp.compile(pmatch);
    } catch(...) {
      throw HfstException(); // TODO
    }

    // A dummy transducer with an alphabet with all the symbols
    hfst::HfstTransducer harmonizer(hfst::TROPICAL_OPENFST_TYPE);

    // First we need to collect a unified alphabet from all the transducers.
    hfst::StringSet symbols_seen;
    for (std::map<std::string, hfst::HfstTransducer *>::const_iterator it =
             definitions.begin(); it != definitions.end(); ++it) {
        hfst::StringSet string_set = it->second->get_alphabet();
        for (hfst::StringSet::const_iterator sym = string_set.begin();
             sym != string_set.end(); ++sym) {
            if (symbols_seen.count(*sym) == 0) {
              harmonizer.disjunct(hfst::HfstTransducer(*sym, hfst::TROPICAL_OPENFST_TYPE));
                symbols_seen.insert(*sym);
            }
        }
    }
    if (symbols_seen.size() == 0) {
        // We don't recognise anything, go home early
        std::cerr << "Empty ruleset, nothing to write\n";
        throw HfstException(); // TODO
    }

    // Then we convert it...
    harmonizer.convert(hfst::HFST_OLW_TYPE);
    // Use these for naughty intermediate steps to make sure
    // everything has the same alphabet
    hfst::HfstBasicTransducer * intermediate_tmp;
    hfst_ol::Transducer * harmonized_tmp;
    hfst::HfstTransducer * output_tmp;

    // When done compiling everything, look for TOP and output it first.
    if (definitions.count("TOP") == 1) {
        intermediate_tmp = hfst::implementations::ConversionFunctions::
            hfst_transducer_to_hfst_basic_transducer(*definitions["TOP"]);
        harmonized_tmp = hfst::implementations::ConversionFunctions::
            hfst_basic_transducer_to_hfst_ol(intermediate_tmp,
                                             true, // weighted
                                             "", // no special options
                                             &harmonizer); // harmonize with this
        output_tmp = hfst::implementations::ConversionFunctions::
            hfst_ol_to_hfst_transducer(harmonized_tmp);
        output_tmp->set_name("TOP");
        retval.push_back(*output_tmp);
        delete definitions["TOP"];
        definitions.erase("TOP");
        delete intermediate_tmp;
        delete output_tmp;

        for (std::map<std::string, hfst::HfstTransducer *>::iterator it =
                 definitions.begin(); it != definitions.end(); ++it) {
            intermediate_tmp = hfst::implementations::ConversionFunctions::
                hfst_transducer_to_hfst_basic_transducer(*(it->second));
            harmonized_tmp = hfst::implementations::ConversionFunctions::
                hfst_basic_transducer_to_hfst_ol(intermediate_tmp,
                                                 true, // weighted
                                                 "", // no special options
                                                 &harmonizer); // harmonize with this
            output_tmp = hfst::implementations::ConversionFunctions::
                hfst_ol_to_hfst_transducer(harmonized_tmp);
            output_tmp->set_name(it->first);
            retval.push_back(*output_tmp);
            delete it->second;
            delete intermediate_tmp;
            delete output_tmp;
        }
    } else {
        std::cerr << "Empty ruleset, nothing to write\n";
        throw HfstException(); // TODO
    }
    return retval;
 }
}