1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
|
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#if !UCONFIG_NO_FORMATTING
#if !UCONFIG_NO_MF2
#include "unicode/messageformat2.h"
#include "messageformat2_allocation.h"
#include "messageformat2_checker.h"
#include "messageformat2_evaluation.h"
#include "messageformat2_function_registry_internal.h"
#include "messageformat2_macros.h"
#include "uvector.h" // U_ASSERT
U_NAMESPACE_BEGIN
namespace message2 {
/*
Checks data model errors
(see https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#error-handling )
The following are checked here:
Variant Key Mismatch
Duplicate Variant
Missing Fallback Variant (called NonexhaustivePattern here)
Missing Selector Annotation
Duplicate Declaration
- Most duplicate declaration errors are checked by the parser,
but the checker checks for declarations of input variables
that were previously implicitly declared
(Duplicate option names and duplicate declarations are checked by the parser)
*/
// Type environments
// -----------------
TypeEnvironment::TypeEnvironment(UErrorCode& status) {
CHECK_ERROR(status);
UVector* temp;
temp = createStringVectorNoAdopt(status);
CHECK_ERROR(status);
annotated.adoptInstead(temp);
temp = createStringVectorNoAdopt(status);
CHECK_ERROR(status);
unannotated.adoptInstead(temp);
temp = createStringVectorNoAdopt(status);
CHECK_ERROR(status);
freeVars.adoptInstead(temp);
}
static bool has(const UVector& v, const VariableName& var) {
return v.contains(const_cast<void*>(static_cast<const void*>(&var)));
}
// Returns true if `var` was either previously used (implicit declaration),
// or is in scope by an explicit declaration
bool TypeEnvironment::known(const VariableName& var) const {
return has(*annotated, var) || has(*unannotated, var) || has(*freeVars, var);
}
TypeEnvironment::Type TypeEnvironment::get(const VariableName& var) const {
U_ASSERT(annotated.isValid());
if (has(*annotated, var)) {
return Annotated;
}
U_ASSERT(unannotated.isValid());
if (has(*unannotated, var)) {
return Unannotated;
}
U_ASSERT(freeVars.isValid());
if (has(*freeVars, var)) {
return FreeVariable;
}
// This case is a "free variable without an implicit declaration",
// i.e. one used only in a selector expression and not in a declaration RHS
return Unannotated;
}
void TypeEnvironment::extend(const VariableName& var, TypeEnvironment::Type t, UErrorCode& status) {
if (t == Unannotated) {
U_ASSERT(unannotated.isValid());
// See comment below
unannotated->addElement(const_cast<void*>(static_cast<const void*>(&var)), status);
return;
}
if (t == FreeVariable) {
U_ASSERT(freeVars.isValid());
// See comment below
freeVars->addElement(const_cast<void*>(static_cast<const void*>(&var)), status);
return;
}
U_ASSERT(annotated.isValid());
// This is safe because elements of `annotated` are never written
// and the lifetime of `var` is guaranteed to include the lifetime of
// `annotated`
annotated->addElement(const_cast<void*>(static_cast<const void*>(&var)), status);
}
TypeEnvironment::~TypeEnvironment() {}
// ---------------------
Key Checker::normalizeNFC(const Key& k) const {
if (k.isWildcard()) {
return k;
}
return Key(Literal(k.asLiteral().isQuoted(),
StandardFunctions::normalizeNFC(k.asLiteral().unquoted())));
}
static bool areDefaultKeys(const Key* keys, int32_t len) {
U_ASSERT(len > 0);
for (int32_t i = 0; i < len; i++) {
if (!keys[i].isWildcard()) {
return false;
}
}
return true;
}
void Checker::addFreeVars(TypeEnvironment& t, const Operand& rand, UErrorCode& status) {
CHECK_ERROR(status);
if (rand.isVariable()) {
const VariableName& v = rand.asVariable();
if (!t.known(v)) {
t.extend(v, TypeEnvironment::Type::FreeVariable, status);
}
}
}
void Checker::addFreeVars(TypeEnvironment& t, const OptionMap& opts, UErrorCode& status) {
for (int32_t i = 0; i < opts.size(); i++) {
const Option& o = opts.getOption(i, status);
CHECK_ERROR(status);
addFreeVars(t, o.getValue(), status);
}
}
void Checker::addFreeVars(TypeEnvironment& t, const Operator& rator, UErrorCode& status) {
CHECK_ERROR(status);
addFreeVars(t, rator.getOptionsInternal(), status);
}
void Checker::addFreeVars(TypeEnvironment& t, const Expression& rhs, UErrorCode& status) {
CHECK_ERROR(status);
if (rhs.isFunctionCall()) {
const Operator* rator = rhs.getOperator(status);
U_ASSERT(U_SUCCESS(status));
addFreeVars(t, *rator, status);
}
addFreeVars(t, rhs.getOperand(), status);
}
void Checker::checkVariants(UErrorCode& status) {
CHECK_ERROR(status);
U_ASSERT(!dataModel.hasPattern());
// Check that each variant has a key list with size
// equal to the number of selectors
const Variant* variants = dataModel.getVariantsInternal();
// Check that one variant includes only wildcards
bool defaultExists = false;
bool duplicatesExist = false;
for (int32_t i = 0; i < dataModel.numVariants(); i++) {
const SelectorKeys& k = variants[i].getKeys();
const Key* keys = k.getKeysInternal();
int32_t len = k.len;
if (len != dataModel.numSelectors()) {
// Variant key mismatch
errors.addError(StaticErrorType::VariantKeyMismatchError, status);
return;
}
defaultExists |= areDefaultKeys(keys, len);
// Check if this variant's keys are duplicated by any other variant's keys
if (!duplicatesExist) {
// This check takes quadratic time, but it can be optimized if checking
// this property turns out to be a bottleneck.
for (int32_t j = 0; j < i; j++) {
const SelectorKeys& k1 = variants[j].getKeys();
const Key* keys1 = k1.getKeysInternal();
bool allEqual = true;
// This variant was already checked,
// so we know keys1.len == len
for (int32_t kk = 0; kk < len; kk++) {
if (!(normalizeNFC(keys[kk]) == normalizeNFC(keys1[kk]))) {
allEqual = false;
break;
}
}
if (allEqual) {
duplicatesExist = true;
}
}
}
}
if (duplicatesExist) {
errors.addError(StaticErrorType::DuplicateVariant, status);
}
if (!defaultExists) {
errors.addError(StaticErrorType::NonexhaustivePattern, status);
}
}
void Checker::requireAnnotated(const TypeEnvironment& t,
const VariableName& selectorVar,
UErrorCode& status) {
CHECK_ERROR(status);
if (t.get(selectorVar) == TypeEnvironment::Type::Annotated) {
return; // No error
}
// If this code is reached, an error was detected
errors.addError(StaticErrorType::MissingSelectorAnnotation, status);
}
void Checker::checkSelectors(const TypeEnvironment& t, UErrorCode& status) {
U_ASSERT(!dataModel.hasPattern());
// Check each selector; if it's not annotated, emit a
// "missing selector annotation" error
const VariableName* selectors = dataModel.getSelectorsInternal();
for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
requireAnnotated(t, selectors[i], status);
}
}
TypeEnvironment::Type typeOf(TypeEnvironment& t, const Expression& expr) {
if (expr.isFunctionCall()) {
return TypeEnvironment::Type::Annotated;
}
const Operand& rand = expr.getOperand();
U_ASSERT(!rand.isNull());
if (rand.isLiteral()) {
return TypeEnvironment::Type::Unannotated;
}
U_ASSERT(rand.isVariable());
return t.get(rand.asVariable());
}
void Checker::checkDeclarations(TypeEnvironment& t, UErrorCode& status) {
CHECK_ERROR(status);
// For each declaration, extend the type environment with its type
// Only a very simple type system is necessary: variables
// have the type "annotated", "unannotated", or "free".
// For "missing selector annotation" checking, free variables
// (message arguments) are treated as unannotated.
// Free variables are also used for checking duplicate declarations.
const Binding* env = dataModel.getLocalVariablesInternal();
for (int32_t i = 0; i < dataModel.bindingsLen; i++) {
const Binding& b = env[i];
const VariableName& lhs = b.getVariable();
const Expression& rhs = b.getValue();
// First, add free variables from the RHS of b
// This must be done first so we can catch:
// .local $foo = {$foo}
// (where the RHS is the first use of $foo)
if (b.isLocal()) {
addFreeVars(t, rhs, status);
// Next, check if the LHS equals any free variables
// whose implicit declarations are in scope
if (t.known(lhs) && t.get(lhs) == TypeEnvironment::Type::FreeVariable) {
errors.addError(StaticErrorType::DuplicateDeclarationError, status);
}
} else {
// Input declaration; if b has no annotation, there's nothing to check
if (!b.isLocal() && b.hasAnnotation()) {
const OptionMap& opts = b.getOptionsInternal();
// For .input declarations, we just need to add any variables
// referenced in the options
addFreeVars(t, opts, status);
}
// Next, check if the LHS equals any free variables
// whose implicit declarations are in scope
if (t.known(lhs) && t.get(lhs) == TypeEnvironment::Type::FreeVariable) {
errors.addError(StaticErrorType::DuplicateDeclarationError, status);
}
}
// Next, extend the type environment with a binding from lhs to its type
t.extend(lhs, typeOf(t, rhs), status);
}
}
void Checker::check(UErrorCode& status) {
CHECK_ERROR(status);
TypeEnvironment typeEnv(status);
checkDeclarations(typeEnv, status);
// Pattern message
if (dataModel.hasPattern()) {
return;
} else {
// Selectors message
checkSelectors(typeEnv, status);
checkVariants(status);
}
}
} // namespace message2
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_MF2 */
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|