1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
|
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#ifndef U_HIDE_DEPRECATED_API
#ifndef MESSAGEFORMAT_PARSER_H
#define MESSAGEFORMAT_PARSER_H
#include "unicode/messageformat2_data_model.h"
#include "unicode/parseerr.h"
#include "unicode/uniset.h"
#include "messageformat2_allocation.h"
#include "messageformat2_errors.h"
#if U_SHOW_CPLUSPLUS_API
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
U_NAMESPACE_BEGIN
namespace message2 {
using namespace data_model;
// Used for parameterizing options parsing code
// over the two builders that use it (Operator and Markup)
template <class T>
class OptionAdder {
private:
T& builder;
public:
OptionAdder(T& b) : builder(b) {}
void addOption(const UnicodeString& k, Operand&& r, UErrorCode& s) {
builder.addOption(k, std::move(r), s);
}
};
// Used for parameterizing attributes parsing code
// over the two builders that use it (Expression and Markup)
// Unfortunately the same OptionAdder class can't just be reused,
// becaues duplicate options are forbidden while duplicate attributes are not
template <class T>
class AttributeAdder {
private:
T& builder;
public:
AttributeAdder(T& b) : builder(b) {}
void addAttribute(const UnicodeString& k, Operand&& r, UErrorCode& s) {
builder.addAttribute(k, std::move(r), s);
}
};
// Initialization of UnicodeSets
namespace unisets {
enum Key {
CONTENT,
WHITESPACE,
BIDI,
ALPHA,
DIGIT,
NAME_START,
NAME_CHAR,
TEXT,
QUOTED,
ESCAPABLE,
UNISETS_KEY_COUNT
};
U_I18N_API const UnicodeSet* get(Key key, UErrorCode& status);
}
// Parser class (private)
class Parser : public UMemory {
public:
virtual ~Parser();
private:
friend class MessageFormatter;
void parse(UParseError&, UErrorCode&);
/*
Use an internal "parse error" structure to make it easier to translate
absolute offsets to line offsets.
This is translated back to a `UParseError` at the end of parsing.
*/
typedef struct MessageParseError {
// The line on which the error occurred
uint32_t line;
// The offset, relative to the erroneous line, on which the error occurred
uint32_t offset;
// The total number of characters seen before advancing to the current line. It has a value of 0 if line == 0.
// It includes newline characters, because the index does too.
uint32_t lengthBeforeCurrentLine;
// This parser doesn't yet use the last two fields.
UChar preContext[U_PARSE_CONTEXT_LEN];
UChar postContext[U_PARSE_CONTEXT_LEN];
} MessageParseError;
Parser(const UnicodeString &input,
MFDataModel::Builder& dataModelBuilder,
StaticErrors& e,
UnicodeString& normalizedInputRef,
UErrorCode& status)
: contentChars(unisets::get(unisets::CONTENT, status)),
whitespaceChars(unisets::get(unisets::WHITESPACE, status)),
bidiControlChars(unisets::get(unisets::BIDI, status)),
alphaChars(unisets::get(unisets::ALPHA, status)),
digitChars(unisets::get(unisets::DIGIT, status)),
nameStartChars(unisets::get(unisets::NAME_START, status)),
nameChars(unisets::get(unisets::NAME_CHAR, status)),
textChars(unisets::get(unisets::TEXT, status)),
quotedChars(unisets::get(unisets::QUOTED, status)),
escapableChars(unisets::get(unisets::ESCAPABLE, status)),
source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) {
(void) status;
parseError.line = 0;
parseError.offset = 0;
parseError.lengthBeforeCurrentLine = 0;
parseError.preContext[0] = '\0';
parseError.postContext[0] = '\0';
}
bool isContentChar(UChar32) const;
bool isBidiControl(UChar32) const;
bool isWhitespace(UChar32) const;
bool isTextChar(UChar32) const;
bool isQuotedChar(UChar32) const;
bool isEscapableChar(UChar32) const;
bool isAlpha(UChar32) const;
bool isDigit(UChar32) const;
bool isNameStart(UChar32) const;
bool isNameChar(UChar32) const;
bool isUnquotedStart(UChar32) const;
bool isLiteralStart(UChar32) const;
bool isKeyStart(UChar32) const;
static void translateParseError(const MessageParseError&, UParseError&);
static void setParseError(MessageParseError&, uint32_t);
void maybeAdvanceLine();
Pattern parseSimpleMessage(UErrorCode&);
void parseBody(UErrorCode&);
void parseDeclarations(UErrorCode&);
void parseUnsupportedStatement(UErrorCode&);
void parseLocalDeclaration(UErrorCode&);
void parseInputDeclaration(UErrorCode&);
void parseSelectors(UErrorCode&);
void parseVariant(UErrorCode&);
void parseRequiredWS(UErrorCode&);
void parseRequiredWhitespace(UErrorCode&);
void parseOptionalBidi();
void parseOptionalWhitespace();
void parseToken(UChar32, UErrorCode&);
void parseTokenWithWhitespace(UChar32, UErrorCode&);
void parseToken(const std::u16string_view&, UErrorCode&);
void parseTokenWithWhitespace(const std::u16string_view&, UErrorCode&);
bool nextIs(const std::u16string_view&) const;
UnicodeString parseNameChars(UnicodeString&, UErrorCode&);
UnicodeString parseName(UErrorCode&);
UnicodeString parseIdentifier(UErrorCode&);
UnicodeString parseDigits(UErrorCode&);
VariableName parseVariableName(UErrorCode&);
FunctionName parseFunction(UErrorCode&);
UnicodeString parseEscapeSequence(UErrorCode&);
Literal parseUnquotedLiteral(UErrorCode&);
Literal parseQuotedLiteral(UErrorCode&);
Literal parseLiteral(UErrorCode&);
template<class T>
void parseAttribute(AttributeAdder<T>&, UErrorCode&);
template<class T>
void parseAttributes(AttributeAdder<T>&, UErrorCode&);
template<class T>
void parseOption(OptionAdder<T>&, UErrorCode&);
template<class T>
void parseOptions(OptionAdder<T>&, UErrorCode&);
Operator parseAnnotation(UErrorCode&);
void parseLiteralOrVariableWithAnnotation(bool, Expression::Builder&, UErrorCode&);
Markup parseMarkup(UErrorCode&);
Expression parseExpression(UErrorCode&);
std::variant<Expression, Markup> parsePlaceholder(UErrorCode&);
UnicodeString parseTextChar(UErrorCode&);
Key parseKey(UErrorCode&);
SelectorKeys parseNonEmptyKeys(UErrorCode&);
void errorPattern(UErrorCode& status);
Pattern parseQuotedPattern(UErrorCode&);
bool isDeclarationStart();
UChar32 peek() const { return source.char32At(index) ; }
UChar32 peek(uint32_t i) const {
return source.char32At(source.moveIndex32(index, i));
}
void next() { index = source.moveIndex32(index, 1); }
bool inBounds() const { return (int32_t) index < source.length(); }
bool inBounds(uint32_t i) const { return source.moveIndex32(index, i) < source.length(); }
bool allConsumed() const { return (int32_t) index == source.length(); }
// UnicodeSets for checking character ranges
const UnicodeSet* contentChars;
const UnicodeSet* whitespaceChars;
const UnicodeSet* bidiControlChars;
const UnicodeSet* alphaChars;
const UnicodeSet* digitChars;
const UnicodeSet* nameStartChars;
const UnicodeSet* nameChars;
const UnicodeSet* textChars;
const UnicodeSet* quotedChars;
const UnicodeSet* escapableChars;
// The input string
const UnicodeString &source;
// The current position within the input string -- counting in UChar32
uint32_t index;
// Represents the current line (and when an error is indicated),
// character offset within the line of the parse error
MessageParseError parseError;
// The structure to use for recording errors
StaticErrors& errors;
// Normalized version of the input string (optional whitespace removed)
UnicodeString& normalizedInput;
// The parent builder
MFDataModel::Builder &dataModel;
}; // class Parser
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // MESSAGEFORMAT_PARSER_H
#endif // U_HIDE_DEPRECATED_API
// eof
|