1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <_foundation_unicode/utypes.h>
#if !UCONFIG_NO_FORMATTING
// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT
#include "numparse_types.h"
#include "string_segment.h"
#include "putilimp.h"
#include <_foundation_unicode/utf16.h>
#include <_foundation_unicode/uniset.h>
U_NAMESPACE_BEGIN
StringSegment::StringSegment(const UnicodeString& str, bool ignoreCase)
: fStr(str), fStart(0), fEnd(str.length()),
fFoldCase(ignoreCase) {}
int32_t StringSegment::getOffset() const {
return fStart;
}
void StringSegment::setOffset(int32_t start) {
fStart = start;
}
void StringSegment::adjustOffset(int32_t delta) {
fStart += delta;
}
void StringSegment::adjustOffsetByCodePoint() {
fStart += U16_LENGTH(getCodePoint());
}
void StringSegment::setLength(int32_t length) {
fEnd = fStart + length;
}
void StringSegment::resetLength() {
fEnd = fStr.length();
}
int32_t StringSegment::length() const {
return fEnd - fStart;
}
char16_t StringSegment::charAt(int32_t index) const {
return fStr.charAt(index + fStart);
}
UChar32 StringSegment::codePointAt(int32_t index) const {
return fStr.char32At(index + fStart);
}
UnicodeString StringSegment::toUnicodeString() const {
return UnicodeString(fStr.getBuffer() + fStart, fEnd - fStart);
}
const UnicodeString StringSegment::toTempUnicodeString() const {
// Use the readonly-aliasing constructor for efficiency.
return UnicodeString(false, fStr.getBuffer() + fStart, fEnd - fStart);
}
UChar32 StringSegment::getCodePoint() const {
char16_t lead = fStr.charAt(fStart);
if (U16_IS_LEAD(lead) && fStart + 1 < fEnd) {
return fStr.char32At(fStart);
} else if (U16_IS_SURROGATE(lead)) {
return -1;
} else {
return lead;
}
}
bool StringSegment::startsWith(UChar32 otherCp) const {
return codePointsEqual(getCodePoint(), otherCp, fFoldCase);
}
bool StringSegment::startsWith(const UnicodeSet& uniset) const {
// TODO: Move UnicodeSet case-folding logic here.
// TODO: Handle string matches here instead of separately.
UChar32 cp = getCodePoint();
if (cp == -1) {
return false;
}
return uniset.contains(cp);
}
bool StringSegment::startsWith(const UnicodeString& other) const {
if (other.isBogus() || other.length() == 0 || length() == 0) {
return false;
}
int cp1 = getCodePoint();
int cp2 = other.char32At(0);
return codePointsEqual(cp1, cp2, fFoldCase);
}
int32_t StringSegment::getCommonPrefixLength(const UnicodeString& other) {
return getPrefixLengthInternal(other, fFoldCase);
}
int32_t StringSegment::getCaseSensitivePrefixLength(const UnicodeString& other) {
return getPrefixLengthInternal(other, false);
}
int32_t StringSegment::getPrefixLengthInternal(const UnicodeString& other, bool foldCase) {
U_ASSERT(other.length() > 0);
int32_t offset = 0;
for (; offset < uprv_min(length(), other.length());) {
// TODO: case-fold code points, not chars
char16_t c1 = charAt(offset);
char16_t c2 = other.charAt(offset);
if (!codePointsEqual(c1, c2, foldCase)) {
break;
}
offset++;
}
return offset;
}
bool StringSegment::codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase) {
if (cp1 == cp2) {
return true;
}
if (!foldCase) {
return false;
}
cp1 = u_foldCase(cp1, true);
cp2 = u_foldCase(cp2, true);
return cp1 == cp2;
}
bool StringSegment::operator==(const UnicodeString& other) const {
return toTempUnicodeString() == other;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
|