1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
|
/*
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file picokfst.h
*
* FST knowledge loading and access
*
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
* All rights reserved.
*
* History:
* - 2009-04-20 -- initial version
*
*/
#ifndef PICOKFST_H_
#define PICOKFST_H_
#include "picodefs.h"
#include "picodbg.h"
#include "picoos.h"
#include "picoknow.h"
#ifdef __cplusplus
extern "C" {
#endif
#if 0
}
#endif
typedef picoos_int16 picokfst_symid_t; /* type of symbol identifiers */
typedef picoos_int16 picokfst_class_t; /* type of symbol pair classes */
typedef picoos_int16 picokfst_state_t; /* type of states */
#define PICOKFST_SYMID_EPS (picokfst_symid_t) 0 /* epsilon symbol id */
#define PICOKFST_SYMID_ILLEG (picokfst_symid_t) -1 /* illegal symbol id */
/**
* @addtogroup picokfst
*
* Mapping of values to FST symbol id (relevant for compiling the FST) \n
* Value FST symbol id \n
* -------------------------------------- \n
* phoneme_id -> phoneme_id + 256 * PICOKFST_PLANE_PHONEMES \n
* accentlevel_id -> accentlevel_id + 256 * PICOKFST_PLANE_ACCENTS \n
* POS_id -> POS_id + 256 * PICOKFST_PLANE_POS \n
* pb_strength_id -> pb_strength_id + 256 * PICOKFST_PLANE_PB_STRENGTHS \n
* phon_term_id -> phon_term_id + 256 * PICOKFST_PLANE_INTERN \n
*/
enum picokfst_symbol_plane {
PICOKFST_PLANE_PHONEMES = 0, /* phoneme plane */
PICOKFST_PLANE_ASCII = 1, /* "ascii" plane (values > 127 may be used internally) */
PICOKFST_PLANE_XSAMPA = 2, /* x-sampa primitives plane (pico-specific table) */
PICOKFST_PLANE_ACCENTS = 4, /* accent plane */
PICOKFST_PLANE_POS = 5, /* part of speech plane */
PICOKFST_PLANE_PB_STRENGTHS = 6, /* phrase boundary strength plane */
PICOKFST_PLANE_INTERN = 7 /* internal plane, e.g. phonStartId, phonTermId */
};
/* to be used as bit set, e.g.
* picoos_uint8 transductionMode = PICOKFST_TRANSMODE_NEWSYMS | PICOKFST_TRANSMODE_POSUSED;
*/
enum picofst_transduction_mode {
PICOKFST_TRANSMODE_NEWSYMS = 1, /* e.g. {#WB},{#PB-S},{#PB-W},{#ACC0},{#ACC1},{#ACC2},{#ACC3}, */
PICOKFST_TRANSMODE_POSUSED = 2 /* FST contains Part Of Speech symbols */
};
/* ************************************************************/
/* function to create specialized kb, */
/* to be used by knowledge layer (picorsrc) only */
/* ************************************************************/
/* calculates a small number of data (e.g. addresses) from kb for fast access.
* This data is encapsulated in a picokfst_FST that can later be retrieved
* with picokfst_getFST. */
pico_status_t picokfst_specializeFSTKnowledgeBase(picoknow_KnowledgeBase this,
picoos_Common common);
/* ************************************************************/
/* FST type and getFST function */
/* ************************************************************/
/* FST type */
typedef struct picokfst_fst * picokfst_FST;
/* return kb FST for usage in PU */
picokfst_FST picokfst_getFST(picoknow_KnowledgeBase this);
/* ************************************************************/
/* FST access methods */
/* ************************************************************/
/* returns transduction mode specified with rule sources;
result to be interpreted as set of picofst_transduction_mode */
picoos_uint8 picokfst_kfstGetTransductionMode(picokfst_FST this);
/* returns number of states and number of pair classes in FST;
legal states are 1..nrStates, legal classes are 1..nrClasses */
void picokfst_kfstGetFSTSizes (picokfst_FST this, picoos_int32 *nrStates, picoos_int32 *nrClasses);
/* starts search for all pairs with input symbol 'inSym'; '*inSymFound' returns whether
such pairs exist at all; '*searchState' returns a search state to be used in
subsequent calls to function 'picokfst_kfstGetNextPair', which must be used
to get the symbol pairs */
void picokfst_kfstStartPairSearch (picokfst_FST this, picokfst_symid_t inSym,
picoos_bool * inSymFound, picoos_int32 * searchState);
/* gets next pair for input symbol specified with preceding call to 'picokfst_kfstStartPairSearch';
'*searchState' maintains the search state, 'pairFound' returns whether any more pair was found,
'*outSym' returns the output symbol of the found pair, and '*pairClass' returns the
transition class of the found symbol pair */
void picokfst_kfstGetNextPair (picokfst_FST this, picoos_int32 * searchState,
picoos_bool * pairFound,
picokfst_symid_t * outSym, picokfst_class_t * pairClass);
/* attempts to do FST transition from state 'startState' with pair class 'transClass';
if such a transition exists, 'endState' returns the end state of the transition (> 0),
otherwise 'endState' returns <= 0 */
void picokfst_kfstGetTrans (picokfst_FST this, picokfst_state_t startState, picokfst_class_t transClass,
picokfst_state_t * endState);
/* starts search for all pairs with input epsilon symbol and all correponding
FST transitions starting in state 'startState'; to be used for fast
computation of epsilon closures;
'*inEpsTransFound' returns whether any such transition was found at all;
if so, '*searchState' returns a search state to be used in subsequent calls
to 'picokfst_kfstGetNextInEpsTrans' */
void picokfst_kfstStartInEpsTransSearch (picokfst_FST this, picokfst_state_t startState,
picoos_bool * inEpsTransFound, picoos_int32 * searchState);
/* gets next FST transition with a pair with empty input symbol starting from a state
previoulsy specified in 'picokfst_kfstStartInEpsTransSearch';
'*searchState' maintains the search state, '*inEpsTransFound' returns
whether a new transition with input epsilon was found, '*outSym 'returns
the output symbol of the found pair, and '*endState' returns the end state
of the found transition with that pair */
void picokfst_kfstGetNextInEpsTrans (picokfst_FST this, picoos_int32 * searchState,
picoos_bool * inEpsTransFound,
picokfst_symid_t * outSym, picokfst_state_t * endState);
/* returns whether 'state' is an accepting state of FST; originally, only
state 1 was an accepting state; however, in order to remove the need to
always do a last transition with a termination symbol pair, this function
defines a state as an accepting state if there is transition to state 1
with the terminator symbol pair */
picoos_bool picokfst_kfstIsAcceptingState (picokfst_FST this, picokfst_state_t state);
#ifdef __cplusplus
}
#endif
#endif /*PICOKFST_H_*/
|