1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
|
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2010-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* utf16collationiterator.h
*
* created on: 2010oct27
* created by: Markus W. Scherer
*/
#ifndef __UTF16COLLATIONITERATOR_H__
#define __UTF16COLLATIONITERATOR_H__
#include <_foundation_unicode/utypes.h>
#if !UCONFIG_NO_COLLATION
#include "cmemory.h"
#include "collation.h"
#include "collationdata.h"
#include "collationiterator.h"
#include "normalizer2impl.h"
U_NAMESPACE_BEGIN
/**
* UTF-16 collation element and character iterator.
* Handles normalized UTF-16 text inline, with length or NUL-terminated.
* Unnormalized text is handled by a subclass.
*/
class U_I18N_API UTF16CollationIterator : public CollationIterator {
public:
UTF16CollationIterator(const CollationData *d, UBool numeric,
const char16_t *s, const char16_t *p, const char16_t *lim)
: CollationIterator(d, numeric),
start(s), pos(p), limit(lim) {}
UTF16CollationIterator(const UTF16CollationIterator &other, const char16_t *newText);
virtual ~UTF16CollationIterator();
virtual bool operator==(const CollationIterator &other) const override;
virtual void resetToOffset(int32_t newOffset) override;
virtual int32_t getOffset() const override;
void setText(const char16_t *s, const char16_t *lim) {
reset();
start = pos = s;
limit = lim;
}
virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
protected:
// Copy constructor only for subclasses which set the pointers.
UTF16CollationIterator(const UTF16CollationIterator &other)
: CollationIterator(other),
start(nullptr), pos(nullptr), limit(nullptr) {}
virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
virtual char16_t handleGetTrailSurrogate() override;
virtual UBool foundNULTerminator() override;
virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
// UTF-16 string pointers.
// limit can be nullptr for NUL-terminated strings.
const char16_t *start, *pos, *limit;
};
/**
* Incrementally checks the input text for FCD and normalizes where necessary.
*/
class U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator {
public:
FCDUTF16CollationIterator(const CollationData *data, UBool numeric,
const char16_t *s, const char16_t *p, const char16_t *lim)
: UTF16CollationIterator(data, numeric, s, p, lim),
rawStart(s), segmentStart(p), segmentLimit(nullptr), rawLimit(lim),
nfcImpl(data->nfcImpl),
checkDir(1) {}
FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const char16_t *newText);
virtual ~FCDUTF16CollationIterator();
virtual bool operator==(const CollationIterator &other) const override;
virtual void resetToOffset(int32_t newOffset) override;
virtual int32_t getOffset() const override;
virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
protected:
virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
virtual UBool foundNULTerminator() override;
virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
private:
/**
* Switches to forward checking if possible.
* To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
* Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
*/
void switchToForward();
/**
* Extend the FCD text segment forward or normalize around pos.
* To be called when checkDir > 0 && pos != limit.
* @return true if success, checkDir == 0 and pos != limit
*/
UBool nextSegment(UErrorCode &errorCode);
/**
* Switches to backward checking.
* To be called when checkDir > 0 || (checkDir == 0 && pos == start).
* Returns with checkDir < 0 || (checkDir == 0 && pos != start).
*/
void switchToBackward();
/**
* Extend the FCD text segment backward or normalize around pos.
* To be called when checkDir < 0 && pos != start.
* @return true if success, checkDir == 0 and pos != start
*/
UBool previousSegment(UErrorCode &errorCode);
UBool normalize(const char16_t *from, const char16_t *to, UErrorCode &errorCode);
// Text pointers: The input text is [rawStart, rawLimit[
// where rawLimit can be nullptr for NUL-terminated text.
//
// checkDir > 0:
//
// The input text [segmentStart..pos[ passes the FCD check.
// Moving forward checks incrementally.
// segmentLimit is undefined. limit == rawLimit.
//
// checkDir < 0:
// The input text [pos..segmentLimit[ passes the FCD check.
// Moving backward checks incrementally.
// segmentStart is undefined, start == rawStart.
//
// checkDir == 0:
//
// The input text [segmentStart..segmentLimit[ is being processed.
// These pointers are at FCD boundaries.
// Either this text segment already passes the FCD check
// and segmentStart==start<=pos<=limit==segmentLimit,
// or the current segment had to be normalized so that
// [segmentStart..segmentLimit[ turned into the normalized string,
// corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length().
const char16_t *rawStart;
const char16_t *segmentStart;
const char16_t *segmentLimit;
// rawLimit==nullptr for a NUL-terminated string.
const char16_t *rawLimit;
const Normalizer2Impl &nfcImpl;
UnicodeString normalized;
// Direction of incremental FCD check. See comments before rawStart.
int8_t checkDir;
};
U_NAMESPACE_END
#endif // !UCONFIG_NO_COLLATION
#endif // __UTF16COLLATIONITERATOR_H__
|