File: lstmbe.h

package info (click to toggle)

swiftlang 6.0.3-2

links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 2,519,992 kB
sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573

file content (88 lines) | stat: -rw-r--r-- 2,680 bytes

// © 2021 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#ifndef LSTMBE_H
#define LSTMBE_H

#include <_foundation_unicode/utypes.h>

#if !UCONFIG_NO_BREAK_ITERATION

#include <_foundation_unicode/uniset.h>
#include <_foundation_unicode/ures.h>
#include <_foundation_unicode/utext.h>
#include <_foundation_unicode/utypes.h>

#include "brkeng.h"
#include "dictbe.h"
#include "uvectr32.h"

U_NAMESPACE_BEGIN

class Vectorizer;
struct LSTMData;

/*******************************************************************
 * LSTMBreakEngine
 */

/**
 * <p>LSTMBreakEngine is a kind of DictionaryBreakEngine that uses a
 * LSTM to determine language-specific breaks.</p>
 *
 * <p>After it is constructed a LSTMBreakEngine may be shared between
 * threads without synchronization.</p>
 */
class LSTMBreakEngine : public DictionaryBreakEngine {
public:
    /**
     * <p>Constructor.</p>
     */
    LSTMBreakEngine(const LSTMData* data, const UnicodeSet& set, UErrorCode &status);

    /**
     * <p>Virtual destructor.</p>
     */
    virtual ~LSTMBreakEngine();

    virtual const char16_t* name() const;

protected:
    /**
     * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
     *
     * @param text A UText representing the text
     * @param rangeStart The start of the range of dictionary characters
     * @param rangeEnd The end of the range of dictionary characters
     * @param foundBreaks Output of C array of int32_t break positions, or 0
     * @param status Information on any errors encountered.
     * @return The number of breaks found
     */
     virtual int32_t divideUpDictionaryRange(UText *text,
                                             int32_t rangeStart,
                                             int32_t rangeEnd,
                                             UVector32 &foundBreaks,
                                             UBool isPhraseBreaking,
                                             UErrorCode& status) const override;
private:
    const LSTMData* fData;
    const Vectorizer* fVectorizer;
};

U_CAPI const LanguageBreakEngine* U_EXPORT2 CreateLSTMBreakEngine(
    UScriptCode script, const LSTMData* data, UErrorCode& status);

U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData(
    UResourceBundle* rb, UErrorCode& status);

U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript(
    UScriptCode script, UErrorCode& status);

U_CAPI void U_EXPORT2 DeleteLSTMData(const LSTMData* data);
U_CAPI const char16_t* U_EXPORT2 LSTMDataName(const LSTMData* data);

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

#endif  /* LSTMBE_H */