1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2012-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* uitercollationiterator.h
*
* created on: 2012sep23 (from utf16collationiterator.h)
* created by: Markus W. Scherer
*/
#ifndef __UITERCOLLATIONITERATOR_H__
#define __UITERCOLLATIONITERATOR_H__
#include <_foundation_unicode/utypes.h>
#if !UCONFIG_NO_COLLATION
#include <_foundation_unicode/uiter.h>
#include "cmemory.h"
#include "collation.h"
#include "collationdata.h"
#include "collationiterator.h"
#include "normalizer2impl.h"
U_NAMESPACE_BEGIN
/**
* UCharIterator-based collation element and character iterator.
* Handles normalized text inline, with length or NUL-terminated.
* Unnormalized text is handled by a subclass.
*/
class U_I18N_API UIterCollationIterator : public CollationIterator {
public:
UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
: CollationIterator(d, numeric), iter(ui) {}
virtual ~UIterCollationIterator();
virtual void resetToOffset(int32_t newOffset) override;
virtual int32_t getOffset() const override;
virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
protected:
virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
virtual char16_t handleGetTrailSurrogate() override;
virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
UCharIterator &iter;
};
/**
* Incrementally checks the input text for FCD and normalizes where necessary.
*/
class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
public:
FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
: UIterCollationIterator(data, numeric, ui),
state(ITER_CHECK_FWD), start(startIndex),
nfcImpl(data->nfcImpl) {}
virtual ~FCDUIterCollationIterator();
virtual void resetToOffset(int32_t newOffset) override;
virtual int32_t getOffset() const override;
virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
protected:
virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
virtual char16_t handleGetTrailSurrogate() override;
virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
private:
/**
* Switches to forward checking if possible.
*/
void switchToForward();
/**
* Extends the FCD text segment forward or normalizes around pos.
* @return true if success
*/
UBool nextSegment(UErrorCode &errorCode);
/**
* Switches to backward checking.
*/
void switchToBackward();
/**
* Extends the FCD text segment backward or normalizes around pos.
* @return true if success
*/
UBool previousSegment(UErrorCode &errorCode);
UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
enum State {
/**
* The input text [start..(iter index)[ passes the FCD check.
* Moving forward checks incrementally.
* pos & limit are undefined.
*/
ITER_CHECK_FWD,
/**
* The input text [(iter index)..limit[ passes the FCD check.
* Moving backward checks incrementally.
* start & pos are undefined.
*/
ITER_CHECK_BWD,
/**
* The input text [start..limit[ passes the FCD check.
* pos tracks the current text index.
*/
ITER_IN_FCD_SEGMENT,
/**
* The input text [start..limit[ failed the FCD check and was normalized.
* pos tracks the current index in the normalized string.
* The text iterator is at the limit index.
*/
IN_NORM_ITER_AT_LIMIT,
/**
* The input text [start..limit[ failed the FCD check and was normalized.
* pos tracks the current index in the normalized string.
* The text iterator is at the start index.
*/
IN_NORM_ITER_AT_START
};
State state;
int32_t start;
int32_t pos;
int32_t limit;
const Normalizer2Impl &nfcImpl;
UnicodeString normalized;
};
U_NAMESPACE_END
#endif // !UCONFIG_NO_COLLATION
#endif // __UITERCOLLATIONITERATOR_H__
|