1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
|
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <_foundation_unicode/utypes.h>
#if !UCONFIG_NO_FORMATTING
// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT
#include "numparse_types.h"
#include "numparse_scientific.h"
#include "static_unicode_sets.h"
#include "string_segment.h"
using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;
namespace {
inline const UnicodeSet& minusSignSet() {
return *unisets::get(unisets::MINUS_SIGN);
}
inline const UnicodeSet& plusSignSet() {
return *unisets::get(unisets::PLUS_SIGN);
}
} // namespace
ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
: fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {
const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
if (minusSignSet().contains(minusSign)) {
fCustomMinusSign.setToBogus();
} else {
fCustomMinusSign = minusSign;
}
const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
if (plusSignSet().contains(plusSign)) {
fCustomPlusSign.setToBogus();
} else {
fCustomPlusSign = plusSign;
}
}
bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
// Only accept scientific notation after the mantissa.
if (!result.seenNumber()) {
return false;
}
// Only accept one exponent per string.
if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
return false;
}
// First match the scientific separator, and then match another number after it.
// NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
int32_t initialOffset = segment.getOffset();
int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
if (overlap == fExponentSeparatorString.length()) {
// Full exponent separator match.
// First attempt to get a code point, returning true if we can't get one.
if (segment.length() == overlap) {
return true;
}
segment.adjustOffset(overlap);
// Allow ignorables before the sign.
// Note: call site is guarded by the segment.length() check above.
// Note: the ignorables matcher should not touch the result.
fIgnorablesMatcher.match(segment, result, status);
if (segment.length() == 0) {
segment.setOffset(initialOffset);
return true;
}
// Allow a sign, and then try to match digits.
int8_t exponentSign = 1;
if (segment.startsWith(minusSignSet())) {
exponentSign = -1;
segment.adjustOffsetByCodePoint();
} else if (segment.startsWith(plusSignSet())) {
segment.adjustOffsetByCodePoint();
} else if (segment.startsWith(fCustomMinusSign)) {
overlap = segment.getCommonPrefixLength(fCustomMinusSign);
if (overlap != fCustomMinusSign.length()) {
// Partial custom sign match
segment.setOffset(initialOffset);
return true;
}
exponentSign = -1;
segment.adjustOffset(overlap);
} else if (segment.startsWith(fCustomPlusSign)) {
overlap = segment.getCommonPrefixLength(fCustomPlusSign);
if (overlap != fCustomPlusSign.length()) {
// Partial custom sign match
segment.setOffset(initialOffset);
return true;
}
segment.adjustOffset(overlap);
}
// Return true if the segment is empty.
if (segment.length() == 0) {
segment.setOffset(initialOffset);
return true;
}
// Allow ignorables after the sign.
// Note: call site is guarded by the segment.length() check above.
// Note: the ignorables matcher should not touch the result.
fIgnorablesMatcher.match(segment, result, status);
if (segment.length() == 0) {
segment.setOffset(initialOffset);
return true;
}
// We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
bool wasBogus = result.quantity.bogus;
result.quantity.bogus = false;
int digitsOffset = segment.getOffset();
bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
result.quantity.bogus = wasBogus;
if (segment.getOffset() != digitsOffset) {
// At least one exponent digit was matched.
result.flags |= FLAG_HAS_EXPONENT;
} else {
// No exponent digits were matched
segment.setOffset(initialOffset);
}
return digitsReturnValue;
} else if (overlap == segment.length()) {
// Partial exponent separator match
return true;
}
// No match
return false;
}
bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
return segment.startsWith(fExponentSeparatorString);
}
UnicodeString ScientificMatcher::toString() const {
return u"<Scientific>";
}
#endif /* #if !UCONFIG_NO_FORMATTING */
|