1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
|
// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 1999-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: unistr_case.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:2
*
* created on: 2004aug19
* created by: Markus W. Scherer
*
* Case-mapping functions moved here from unistr.cpp
*/
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "cstring.h"
#include "cmemory.h"
#include "unicode/ustring.h"
#include "unicode/unistr.h"
#include "unicode/uchar.h"
#include "uelement.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN
//========================================
// Read-only implementation
//========================================
int8_t
UnicodeString::doCaseCompare(int32_t start,
int32_t length,
const UChar *srcChars,
int32_t srcStart,
int32_t srcLength,
uint32_t options) const
{
// compare illegal string values
// treat const UChar *srcChars==NULL as an empty string
if(isBogus()) {
return -1;
}
// pin indices to legal values
pinIndices(start, length);
if(srcChars == NULL) {
srcStart = srcLength = 0;
}
// get the correct pointer
const UChar *chars = getArrayStart();
chars += start;
if(srcStart!=0) {
srcChars += srcStart;
}
if(chars != srcChars) {
UErrorCode errorCode=U_ZERO_ERROR;
int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
options|U_COMPARE_IGNORE_CASE, &errorCode);
if(result!=0) {
return (int8_t)(result >> 24 | 1);
}
} else {
// get the srcLength if necessary
if(srcLength < 0) {
srcLength = u_strlen(srcChars + srcStart);
}
if(length != srcLength) {
return (int8_t)((length - srcLength) >> 24 | 1);
}
}
return 0;
}
//========================================
// Write implementation
//========================================
UnicodeString &
UnicodeString::caseMap(const UCaseMap *csm,
UStringCaseMapper *stringCaseMapper) {
if(isEmpty() || !isWritable()) {
// nothing to do
return *this;
}
// We need to allocate a new buffer for the internal string case mapping function.
// This is very similar to how doReplace() keeps the old array pointer
// and deletes the old array itself after it is done.
// In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
UChar oldStackBuffer[US_STACKBUF_SIZE];
UChar *oldArray;
int32_t oldLength;
if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
// copy the stack buffer contents because it will be overwritten
oldArray = oldStackBuffer;
oldLength = getShortLength();
u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength);
} else {
oldArray = getArrayStart();
oldLength = length();
}
int32_t capacity;
if(oldLength <= US_STACKBUF_SIZE) {
capacity = US_STACKBUF_SIZE;
} else {
capacity = oldLength + 20;
}
int32_t *bufferToDelete = 0;
if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
return *this;
}
// Case-map, and if the result is too long, then reallocate and repeat.
UErrorCode errorCode;
int32_t newLength;
do {
errorCode = U_ZERO_ERROR;
newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
oldArray, oldLength, &errorCode);
setLength(newLength);
} while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
if (bufferToDelete) {
uprv_free(bufferToDelete);
}
if(U_FAILURE(errorCode)) {
setToBogus();
}
return *this;
}
UnicodeString &
UnicodeString::foldCase(uint32_t options) {
UCaseMap csm=UCASEMAP_INITIALIZER;
csm.csp=ucase_getSingleton();
csm.options=options;
return caseMap(&csm, ustrcase_internalFold);
}
U_NAMESPACE_END
// Defined here to reduce dependencies on break iterator
U_CAPI int32_t U_EXPORT2
uhash_hashCaselessUnicodeString(const UElement key) {
U_NAMESPACE_USE
const UnicodeString *str = (const UnicodeString*) key.pointer;
if (str == NULL) {
return 0;
}
// Inefficient; a better way would be to have a hash function in
// UnicodeString that does case folding on the fly.
UnicodeString copy(*str);
return copy.foldCase().hashCode();
}
// Defined here to reduce dependencies on break iterator
U_CAPI UBool U_EXPORT2
uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
U_NAMESPACE_USE
const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
if (str1 == str2) {
return TRUE;
}
if (str1 == NULL || str2 == NULL) {
return FALSE;
}
return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
}
|