1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
|
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2007-2016, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File PLURRULE_IMPL.H
*
*******************************************************************************
*/
#ifndef PLURRULE_IMPL
#define PLURRULE_IMPL
// Internal definitions for the PluralRules implementation.
#include <_foundation_unicode/utypes.h>
#if !UCONFIG_NO_FORMATTING
#include <_foundation_unicode/format.h>
#include <_foundation_unicode/locid.h>
#include <_foundation_unicode/parseerr.h>
#include <_foundation_unicode/strenum.h>
#include <_foundation_unicode/ures.h>
#include "uvector.h"
#include "hash.h"
#include "uassert.h"
/**
* A FixedDecimal version of UPLRULES_NO_UNIQUE_VALUE used in PluralRulesTest
* for parsing of samples.
*/
#define UPLRULES_NO_UNIQUE_VALUE_DECIMAL(ERROR_CODE) (DecimalQuantity::fromExponentString(u"-0.00123456777", ERROR_CODE))
class PluralRulesTest;
U_NAMESPACE_BEGIN
class AndConstraint;
class RuleChain;
class DigitInterval;
class PluralRules;
class VisibleDigits;
namespace pluralimpl {
// TODO: Remove this and replace with u"" literals. Was for EBCDIC compatibility.
static const char16_t DOT = ((char16_t) 0x002E);
static const char16_t SINGLE_QUOTE = ((char16_t) 0x0027);
static const char16_t SLASH = ((char16_t) 0x002F);
static const char16_t BACKSLASH = ((char16_t) 0x005C);
static const char16_t SPACE = ((char16_t) 0x0020);
static const char16_t EXCLAMATION = ((char16_t) 0x0021);
static const char16_t QUOTATION_MARK = ((char16_t) 0x0022);
static const char16_t NUMBER_SIGN = ((char16_t) 0x0023);
static const char16_t PERCENT_SIGN = ((char16_t) 0x0025);
static const char16_t ASTERISK = ((char16_t) 0x002A);
static const char16_t COMMA = ((char16_t) 0x002C);
static const char16_t HYPHEN = ((char16_t) 0x002D);
static const char16_t U_ZERO = ((char16_t) 0x0030);
static const char16_t U_ONE = ((char16_t) 0x0031);
static const char16_t U_TWO = ((char16_t) 0x0032);
static const char16_t U_THREE = ((char16_t) 0x0033);
static const char16_t U_FOUR = ((char16_t) 0x0034);
static const char16_t U_FIVE = ((char16_t) 0x0035);
static const char16_t U_SIX = ((char16_t) 0x0036);
static const char16_t U_SEVEN = ((char16_t) 0x0037);
static const char16_t U_EIGHT = ((char16_t) 0x0038);
static const char16_t U_NINE = ((char16_t) 0x0039);
static const char16_t COLON = ((char16_t) 0x003A);
static const char16_t SEMI_COLON = ((char16_t) 0x003B);
static const char16_t EQUALS = ((char16_t) 0x003D);
static const char16_t AT = ((char16_t) 0x0040);
static const char16_t CAP_A = ((char16_t) 0x0041);
static const char16_t CAP_B = ((char16_t) 0x0042);
static const char16_t CAP_R = ((char16_t) 0x0052);
static const char16_t CAP_Z = ((char16_t) 0x005A);
static const char16_t LOWLINE = ((char16_t) 0x005F);
static const char16_t LEFTBRACE = ((char16_t) 0x007B);
static const char16_t RIGHTBRACE = ((char16_t) 0x007D);
static const char16_t TILDE = ((char16_t) 0x007E);
static const char16_t ELLIPSIS = ((char16_t) 0x2026);
static const char16_t LOW_A = ((char16_t) 0x0061);
static const char16_t LOW_B = ((char16_t) 0x0062);
static const char16_t LOW_C = ((char16_t) 0x0063);
static const char16_t LOW_D = ((char16_t) 0x0064);
static const char16_t LOW_E = ((char16_t) 0x0065);
static const char16_t LOW_F = ((char16_t) 0x0066);
static const char16_t LOW_G = ((char16_t) 0x0067);
static const char16_t LOW_H = ((char16_t) 0x0068);
static const char16_t LOW_I = ((char16_t) 0x0069);
static const char16_t LOW_J = ((char16_t) 0x006a);
static const char16_t LOW_K = ((char16_t) 0x006B);
static const char16_t LOW_L = ((char16_t) 0x006C);
static const char16_t LOW_M = ((char16_t) 0x006D);
static const char16_t LOW_N = ((char16_t) 0x006E);
static const char16_t LOW_O = ((char16_t) 0x006F);
static const char16_t LOW_P = ((char16_t) 0x0070);
static const char16_t LOW_Q = ((char16_t) 0x0071);
static const char16_t LOW_R = ((char16_t) 0x0072);
static const char16_t LOW_S = ((char16_t) 0x0073);
static const char16_t LOW_T = ((char16_t) 0x0074);
static const char16_t LOW_U = ((char16_t) 0x0075);
static const char16_t LOW_V = ((char16_t) 0x0076);
static const char16_t LOW_W = ((char16_t) 0x0077);
static const char16_t LOW_Y = ((char16_t) 0x0079);
static const char16_t LOW_Z = ((char16_t) 0x007A);
}
static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff;
enum tokenType {
none,
tNumber,
tComma,
tSemiColon,
tSpace,
tColon,
tAt, // '@'
tDot,
tDot2,
tEllipsis,
tKeyword,
tAnd,
tOr,
tMod, // 'mod' or '%'
tNot, // 'not' only.
tIn, // 'in' only.
tEqual, // '=' only.
tNotEqual, // '!='
tTilde,
tWithin,
tIs,
tVariableN,
tVariableI,
tVariableF,
tVariableV,
tVariableT,
tVariableE,
tVariableC,
tDecimal,
tInteger,
tEOF
};
class PluralRuleParser: public UMemory {
public:
PluralRuleParser();
virtual ~PluralRuleParser();
void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status);
void getNextToken(UErrorCode &status);
void checkSyntax(UErrorCode &status);
static int32_t getNumberValue(const UnicodeString &token);
private:
static tokenType getKeyType(const UnicodeString& token, tokenType type);
static tokenType charType(char16_t ch);
static UBool isValidKeyword(const UnicodeString& token);
const UnicodeString *ruleSrc; // The rules string.
int32_t ruleIndex; // String index in the input rules, the current parse position.
UnicodeString token; // Token most recently scanned.
tokenType type;
tokenType prevType;
// The items currently being parsed & built.
// Note: currentChain may not be the last RuleChain in the
// list because the "other" chain is forced to the end.
AndConstraint *curAndConstraint;
RuleChain *currentChain;
int32_t rangeLowIdx; // Indices in the UVector of ranges of the
int32_t rangeHiIdx; // low and hi values currently being parsed.
enum EParseState {
kKeyword,
kExpr,
kValue,
kRangeList,
kSamples
};
};
enum PluralOperand {
/**
* The double value of the entire number.
*/
PLURAL_OPERAND_N,
/**
* The integer value, with the fraction digits truncated off.
*/
PLURAL_OPERAND_I,
/**
* All visible fraction digits as an integer, including trailing zeros.
*/
PLURAL_OPERAND_F,
/**
* Visible fraction digits as an integer, not including trailing zeros.
*/
PLURAL_OPERAND_T,
/**
* Number of visible fraction digits.
*/
PLURAL_OPERAND_V,
/**
* Number of visible fraction digits, not including trailing zeros.
*/
PLURAL_OPERAND_W,
/**
* Suppressed exponent for scientific notation (exponent needed in
* scientific notation to approximate i).
*/
PLURAL_OPERAND_E,
/**
* This operand is currently treated as an alias for `PLURAL_OPERAND_E`.
* In the future, it will represent:
*
* Suppressed exponent for compact notation (exponent needed in
* compact notation to approximate i).
*/
PLURAL_OPERAND_C,
/**
* THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC.
*
* <p>Returns the integer value, but will fail if the number has fraction digits.
* That is, using "j" instead of "i" is like implicitly adding "v is 0".
*
* <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches
* "3" but not "3.1" or "3.0".
*/
PLURAL_OPERAND_J
};
/**
* Converts from the tokenType enum to PluralOperand. Asserts that the given
* tokenType can be mapped to a PluralOperand.
*/
PluralOperand tokenTypeToPluralOperand(tokenType tt);
/**
* An interface to FixedDecimal, allowing for other implementations.
* @internal
*/
class U_I18N_API IFixedDecimal {
public:
virtual ~IFixedDecimal();
/**
* Returns the value corresponding to the specified operand (n, i, f, t, v, or w).
* If the operand is 'n', returns a double; otherwise, returns an integer.
*/
virtual double getPluralOperand(PluralOperand operand) const = 0;
virtual bool isNaN() const = 0;
virtual bool isInfinite() const = 0;
/** Whether the number has no nonzero fraction digits. */
virtual bool hasIntegerValue() const = 0;
};
/**
* class FixedDecimal serves to communicate the properties
* of a formatted number from a decimal formatter to PluralRules::select()
*
* see DecimalFormat::getFixedDecimal()
* @internal
*/
class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject {
public:
/**
* @param n the number, e.g. 12.345
* @param v The number of visible fraction digits, e.g. 3
* @param f The fraction digits, e.g. 345
* @param e The exponent, e.g. 7 in 1.2e7, for scientific notation
* @param c Currently: an alias for param `e`.
*/
FixedDecimal(double n, int32_t v, int64_t f, int32_t e, int32_t c);
FixedDecimal(double n, int32_t v, int64_t f, int32_t e);
FixedDecimal(double n, int32_t v, int64_t f);
FixedDecimal(double n, int32_t);
explicit FixedDecimal(double n);
FixedDecimal();
~FixedDecimal() override;
FixedDecimal(const UnicodeString &s, UErrorCode &ec);
FixedDecimal(const FixedDecimal &other);
static FixedDecimal createWithExponent(double n, int32_t v, int32_t e);
double getPluralOperand(PluralOperand operand) const override;
bool isNaN() const override;
bool isInfinite() const override;
bool hasIntegerValue() const override;
bool isNanOrInfinity() const; // used in decimfmtimpl.cpp
int32_t getVisibleFractionDigitCount() const;
void init(double n, int32_t v, int64_t f, int32_t e, int32_t c);
void init(double n, int32_t v, int64_t f, int32_t e);
void init(double n, int32_t v, int64_t f);
void init(double n);
UBool quickInit(double n); // Try a fast-path only initialization,
// return true if successful.
void adjustForMinFractionDigits(int32_t min);
static int64_t getFractionalDigits(double n, int32_t v);
static int32_t decimals(double n);
FixedDecimal& operator=(const FixedDecimal& other) = default;
bool operator==(const FixedDecimal &other) const;
UnicodeString toString() const;
double doubleValue() const;
int64_t longValue() const;
double source;
int32_t visibleDecimalDigitCount;
int64_t decimalDigits;
int64_t decimalDigitsWithoutTrailingZeros;
int64_t intValue;
int32_t exponent;
UBool _hasIntegerValue;
UBool isNegative;
UBool _isNaN;
UBool _isInfinite;
};
class AndConstraint : public UMemory {
public:
typedef enum RuleOp {
NONE,
MOD
} RuleOp;
RuleOp op = AndConstraint::NONE;
int32_t opNum = -1; // for mod expressions, the right operand of the mod.
int32_t value = -1; // valid for 'is' rules only.
UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise.
UBool negated = false; // true for negated rules.
UBool integerOnly = false; // true for 'within' rules.
tokenType digitsType = none; // n | i | v | f constraint.
AndConstraint *next = nullptr;
// Internal error status, used for errors that occur during the copy constructor.
UErrorCode fInternalStatus = U_ZERO_ERROR;
AndConstraint() = default;
AndConstraint(const AndConstraint& other);
virtual ~AndConstraint();
AndConstraint* add(UErrorCode& status);
// UBool isFulfilled(double number);
UBool isFulfilled(const IFixedDecimal &number);
};
class OrConstraint : public UMemory {
public:
AndConstraint *childNode = nullptr;
OrConstraint *next = nullptr;
// Internal error status, used for errors that occur during the copy constructor.
UErrorCode fInternalStatus = U_ZERO_ERROR;
OrConstraint() = default;
OrConstraint(const OrConstraint& other);
virtual ~OrConstraint();
AndConstraint* add(UErrorCode& status);
// UBool isFulfilled(double number);
UBool isFulfilled(const IFixedDecimal &number);
};
class RuleChain : public UMemory {
public:
UnicodeString fKeyword;
RuleChain *fNext = nullptr;
OrConstraint *ruleHeader = nullptr;
UnicodeString fDecimalSamples; // Samples strings from rule source
UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed.
UBool fDecimalSamplesUnbounded = false;
UBool fIntegerSamplesUnbounded = false;
// Internal error status, used for errors that occur during the copy constructor.
UErrorCode fInternalStatus = U_ZERO_ERROR;
RuleChain() = default;
RuleChain(const RuleChain& other);
virtual ~RuleChain();
UnicodeString select(const IFixedDecimal &number) const;
void dumpRules(UnicodeString& result);
UErrorCode getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const;
UBool isKeyword(const UnicodeString& keyword) const;
};
class PluralKeywordEnumeration : public StringEnumeration {
public:
PluralKeywordEnumeration(RuleChain *header, UErrorCode& status);
virtual ~PluralKeywordEnumeration();
static UClassID U_EXPORT2 getStaticClassID();
virtual UClassID getDynamicClassID() const override;
virtual const UnicodeString* snext(UErrorCode& status) override;
virtual void reset(UErrorCode& status) override;
virtual int32_t count(UErrorCode& status) const override;
private:
int32_t pos;
UVector fKeywordNames;
};
class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration {
public:
PluralAvailableLocalesEnumeration(UErrorCode &status);
virtual ~PluralAvailableLocalesEnumeration();
virtual const char* next(int32_t *resultLength, UErrorCode& status) override;
virtual void reset(UErrorCode& status) override;
virtual int32_t count(UErrorCode& status) const override;
private:
UErrorCode fOpenStatus;
UResourceBundle *fLocales = nullptr;
UResourceBundle *fRes = nullptr;
};
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif // _PLURRULE_IMPL
//eof
|