1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
|
namespace hfst
{
hfst_ol::LocationVectorVector pmatch_locate
(hfst_ol::PmatchContainer * cont,
const std::string & input,
double time_cutoff = 0.0)
{
return cont->locate(input, time_cutoff);
}
hfst_ol::LocationVectorVector pmatch_locate
(hfst_ol::PmatchContainer * cont,
const std::string & input,
double time_cutoff,
float weight_cutoff)
{
return cont->locate(input, time_cutoff, weight_cutoff);
}
std::ostringstream pmatch_tokenize_ostringstream;
std::string pmatch_get_tokenized_output
(hfst_ol::PmatchContainer * cont,
const std::string & input_text,
const std::string & output_format,
int * max_weight_classes,
bool dedupe,
bool print_weights,
bool print_all,
double time_cutoff,
bool verbose,
float beam,
bool tokenize_multichar)
{
pmatch_tokenize_ostringstream.str("");
hfst_ol_tokenize::TokenizeSettings settings;
if (output_format == "tokenize")
settings.output_format=hfst_ol_tokenize::OutputFormat::tokenize;
else if (output_format == "space_separated")
settings.output_format=hfst_ol_tokenize::OutputFormat::space_separated;
else if (output_format == "xerox")
settings.output_format=hfst_ol_tokenize::OutputFormat::xerox;
else if (output_format == "cg")
settings.output_format=hfst_ol_tokenize::OutputFormat::cg;
else if (output_format == "finnpos")
settings.output_format=hfst_ol_tokenize::OutputFormat::finnpos;
else if (output_format == "giellacg")
settings.output_format=hfst_ol_tokenize::OutputFormat::giellacg;
else if (output_format == "conllu")
settings.output_format=hfst_ol_tokenize::OutputFormat::conllu;
else
throw "output_format not recognized";
if (max_weight_classes == NULL)
settings.max_weight_classes = std::numeric_limits<int>::max();
else
settings.max_weight_classes = *max_weight_classes;
settings.dedupe = dedupe;
settings.print_weights = print_weights;
settings.print_all = print_all;
settings.time_cutoff = time_cutoff;
settings.verbose = verbose;
settings.beam = beam;
settings.tokenize_multichar = tokenize_multichar;
hfst_ol_tokenize::match_and_print(*cont, pmatch_tokenize_ostringstream, input_text, settings);
return pmatch_tokenize_ostringstream.str();
}
}
|