File: hfst_lookup_extensions.cpp

package info (click to toggle)
hfst 3.16.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 14,532 kB
  • sloc: cpp: 101,875; sh: 6,717; python: 5,225; yacc: 4,985; lex: 2,900; makefile: 2,017; xml: 6
file content (91 lines) | stat: -rw-r--r-- 3,087 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
namespace hfst {

std::string one_level_paths_to_string(const hfst::HfstOneLevelPaths & paths)
{
    std::ostringstream oss;
    for(hfst::HfstOneLevelPaths::const_iterator it = paths.begin(); it != paths.end(); it++)
    {
      for (hfst::StringVector::const_iterator svit = it->second.begin(); svit != it->second.end(); svit++)
      {
        oss << *svit;
      }
      oss << "\t" << it->first << std::endl;
    }
    return oss.str();
}

hfst::HfstOneLevelPaths extract_output_side(const hfst::HfstTwoLevelPaths & paths)
{
    hfst::HfstOneLevelPaths result;
    for(hfst::HfstTwoLevelPaths::const_iterator it = paths.begin(); it != paths.end(); it++)
    {
      hfst::StringVector sv;
      for (hfst::StringPairVector::const_iterator svit = it->second.begin(); svit != it->second.end(); svit++)
      {
        sv.push_back(svit->second);
      }
      result.insert(std::pair<float, hfst::StringVector>(it->first, sv));
    }
    return result;
}

std::string two_level_paths_to_string(const hfst::HfstTwoLevelPaths & paths)
{
    std::ostringstream oss;
    for(hfst::HfstTwoLevelPaths::const_iterator it = paths.begin(); it != paths.end(); it++)
    {
      std::string input("");
      std::string output("");
      for (hfst::StringPairVector::const_iterator svit = it->second.begin(); svit != it->second.end(); svit++)
      {
        input += svit->first;
        output += svit->second;
      }
      oss << input << ":" << output << "\t" << it->first << std::endl;
    }
    return oss.str();
}

// *** Wrappers for lookup functions *** //

HfstOneLevelPaths lookup_vector(const hfst::HfstTransducer * tr, bool fd, const StringVector& s, int limit = -1, double time_cutoff = 0.0) throw()
{
  if (tr->get_type() == hfst::HFST_OL_TYPE || tr->get_type() == hfst::HFST_OLW_TYPE)
    {
      HfstOneLevelPaths *res_ptr = \
        fd ? tr->lookup_fd(s, limit, time_cutoff) : tr->lookup(s, limit, time_cutoff);
      HfstOneLevelPaths res = *res_ptr;
      delete res_ptr;
      return res;
    }

  hfst::HfstTwoLevelPaths result;
  hfst::HfstBasicTransducer fsm(*tr);
  (void)time_cutoff;
  fsm.lookup(s, result, NULL, NULL, limit, fd);
  return hfst::extract_output_side(result);
}

HfstOneLevelPaths lookup_string(const hfst::HfstTransducer * tr, bool fd, const std::string& s, int limit = -1, double time_cutoff = 0.0) throw()
{
  if (tr->get_type() == hfst::HFST_OL_TYPE || tr->get_type() == hfst::HFST_OLW_TYPE)
    {
      HfstOneLevelPaths *res_ptr = \
        fd ? tr->lookup_fd(s, limit, time_cutoff) : tr->lookup(s, limit, time_cutoff);
      HfstOneLevelPaths res = *res_ptr;
      delete res_ptr;
      return res;
    }
  hfst::HfstBasicTransducer fsm(*tr);
  hfst::StringSet alpha = fsm.get_input_symbols();
  hfst::HfstTokenizer tok;
  for (hfst::StringSet::const_iterator it = alpha.begin(); it != alpha.end(); it++)
    { tok.add_multichar_symbol(*it); }
  StringVector sv = tok.tokenize_one_level(s);
  hfst::HfstTwoLevelPaths result;
  (void)time_cutoff;
  fsm.lookup(sv, result, NULL, NULL, limit, fd);
  return hfst::extract_output_side(result);
}

}