File: TextBreakIteratorICU.cpp

package info (click to toggle)
chromium-browser 57.0.2987.98-1~deb8u1
links: PTS, VCS
area: main
in suites: jessie
size: 2,637,852 kB
ctags: 2,544,394
sloc: cpp: 12,815,961; ansic: 3,676,222; python: 1,147,112; asm: 526,608; java: 523,212; xml: 286,794; perl: 92,654; sh: 86,408; objc: 73,271; makefile: 27,698; cs: 18,487; yacc: 13,031; tcl: 12,957; pascal: 4,875; ml: 4,716; lex: 3,904; sql: 3,862; ruby: 1,982; lisp: 1,508; php: 1,368; exp: 404; awk: 325; csh: 117; jsp: 39; sed: 37
file content (941 lines) | stat: -rw-r--r-- 35,158 bytes
/*
 * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
 * Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this library; see the file COPYING.LIB.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 *
 */

#include "platform/text/TextBreakIterator.h"

#include "platform/text/TextBreakIteratorInternalICU.h"
#include "wtf/Assertions.h"
#include "wtf/HashMap.h"
#include "wtf/PtrUtil.h"
#include "wtf/ThreadSpecific.h"
#include "wtf/ThreadingPrimitives.h"
#include "wtf/text/WTFString.h"
#include <memory>
#include <unicode/rbbi.h>
#include <unicode/ubrk.h>

using namespace WTF;

namespace blink {

class LineBreakIteratorPool final {
  USING_FAST_MALLOC(LineBreakIteratorPool);
  WTF_MAKE_NONCOPYABLE(LineBreakIteratorPool);

 public:
  static LineBreakIteratorPool& sharedPool() {
    static WTF::ThreadSpecific<LineBreakIteratorPool>* pool =
        new WTF::ThreadSpecific<LineBreakIteratorPool>;
    return **pool;
  }

  static std::unique_ptr<LineBreakIteratorPool> create() {
    return WTF::wrapUnique(new LineBreakIteratorPool);
  }

  icu::BreakIterator* take(const AtomicString& locale) {
    icu::BreakIterator* iterator = 0;
    for (size_t i = 0; i < m_pool.size(); ++i) {
      if (m_pool[i].first == locale) {
        iterator = m_pool[i].second;
        m_pool.remove(i);
        break;
      }
    }

    if (!iterator) {
      UErrorCode openStatus = U_ZERO_ERROR;
      bool localeIsEmpty = locale.isEmpty();
      iterator = icu::BreakIterator::createLineInstance(
          localeIsEmpty ? icu::Locale(currentTextBreakLocaleID())
                        : icu::Locale(locale.utf8().data()),
          openStatus);
      // locale comes from a web page and it can be invalid, leading ICU
      // to fail, in which case we fall back to the default locale.
      if (!localeIsEmpty && U_FAILURE(openStatus)) {
        openStatus = U_ZERO_ERROR;
        iterator = icu::BreakIterator::createLineInstance(
            icu::Locale(currentTextBreakLocaleID()), openStatus);
      }

      if (U_FAILURE(openStatus)) {
        DLOG(ERROR) << "icu::BreakIterator construction failed with status "
                    << openStatus;
        return 0;
      }
    }

    ASSERT(!m_vendedIterators.contains(iterator));
    m_vendedIterators.set(iterator, locale);
    return iterator;
  }

  void put(icu::BreakIterator* iterator) {
    DCHECK(m_vendedIterators.contains(iterator));

    if (m_pool.size() == capacity) {
      delete (m_pool[0].second);
      m_pool.remove(0);
    }

    m_pool.push_back(Entry(m_vendedIterators.take(iterator), iterator));
  }

 private:
  LineBreakIteratorPool() {}

  static const size_t capacity = 4;

  typedef std::pair<AtomicString, icu::BreakIterator*> Entry;
  typedef Vector<Entry, capacity> Pool;
  Pool m_pool;
  HashMap<icu::BreakIterator*, AtomicString> m_vendedIterators;

  friend WTF::ThreadSpecific<LineBreakIteratorPool>::
  operator LineBreakIteratorPool*();
};

enum TextContext { NoContext, PriorContext, PrimaryContext };

const int textBufferCapacity = 16;

typedef struct {
  DISALLOW_NEW();
  UText text;
  UChar buffer[textBufferCapacity];
} UTextWithBuffer;

static inline int64_t textPinIndex(int64_t& index, int64_t limit) {
  if (index < 0)
    index = 0;
  else if (index > limit)
    index = limit;
  return index;
}

static inline int64_t textNativeLength(UText* text) {
  return text->a + text->b;
}

// Relocate pointer from source into destination as required.
static void textFixPointer(const UText* source,
                           UText* destination,
                           const void*& pointer) {
  if (pointer >= source->pExtra &&
      pointer < static_cast<char*>(source->pExtra) + source->extraSize) {
    // Pointer references source extra buffer.
    pointer = static_cast<char*>(destination->pExtra) +
              (static_cast<const char*>(pointer) -
               static_cast<const char*>(source->pExtra));
  } else if (pointer >= source &&
             pointer <
                 reinterpret_cast<const char*>(source) + source->sizeOfStruct) {
    // Pointer references source text structure, but not source extra buffer.
    pointer = reinterpret_cast<char*>(destination) +
              (static_cast<const char*>(pointer) -
               reinterpret_cast<const char*>(source));
  }
}

static UText* textClone(UText* destination,
                        const UText* source,
                        UBool deep,
                        UErrorCode* status) {
  DCHECK(!deep);
  if (U_FAILURE(*status))
    return 0;
  int32_t extraSize = source->extraSize;
  destination = utext_setup(destination, extraSize, status);
  if (U_FAILURE(*status))
    return destination;
  void* extraNew = destination->pExtra;
  int32_t flags = destination->flags;
  int sizeToCopy = std::min(source->sizeOfStruct, destination->sizeOfStruct);
  memcpy(destination, source, sizeToCopy);
  destination->pExtra = extraNew;
  destination->flags = flags;
  memcpy(destination->pExtra, source->pExtra, extraSize);
  textFixPointer(source, destination, destination->context);
  textFixPointer(source, destination, destination->p);
  textFixPointer(source, destination, destination->q);
  ASSERT(!destination->r);
  const void* chunkContents =
      static_cast<const void*>(destination->chunkContents);
  textFixPointer(source, destination, chunkContents);
  destination->chunkContents = static_cast<const UChar*>(chunkContents);
  return destination;
}

static int32_t textExtract(UText*,
                           int64_t,
                           int64_t,
                           UChar*,
                           int32_t,
                           UErrorCode* errorCode) {
  // In the present context, this text provider is used only with ICU functions
  // that do not perform an extract operation.
  ASSERT_NOT_REACHED();
  *errorCode = U_UNSUPPORTED_ERROR;
  return 0;
}

static void textClose(UText* text) {
  text->context = 0;
}

static inline TextContext textGetContext(const UText* text,
                                         int64_t nativeIndex,
                                         UBool forward) {
  if (!text->b || nativeIndex > text->b)
    return PrimaryContext;
  if (nativeIndex == text->b)
    return forward ? PrimaryContext : PriorContext;
  return PriorContext;
}

static inline TextContext textLatin1GetCurrentContext(const UText* text) {
  if (!text->chunkContents)
    return NoContext;
  return text->chunkContents == text->pExtra ? PrimaryContext : PriorContext;
}

static void textLatin1MoveInPrimaryContext(UText* text,
                                           int64_t nativeIndex,
                                           int64_t nativeLength,
                                           UBool forward) {
  ASSERT(text->chunkContents == text->pExtra);
  if (forward) {
    ASSERT(nativeIndex >= text->b && nativeIndex < nativeLength);
    text->chunkNativeStart = nativeIndex;
    text->chunkNativeLimit = nativeIndex + text->extraSize / sizeof(UChar);
    if (text->chunkNativeLimit > nativeLength)
      text->chunkNativeLimit = nativeLength;
  } else {
    ASSERT(nativeIndex > text->b && nativeIndex <= nativeLength);
    text->chunkNativeLimit = nativeIndex;
    text->chunkNativeStart = nativeIndex - text->extraSize / sizeof(UChar);
    if (text->chunkNativeStart < text->b)
      text->chunkNativeStart = text->b;
  }
  int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
  // Ensure chunk length is well defined if computed length exceeds int32_t
  // range.
  ASSERT(length <= std::numeric_limits<int32_t>::max());
  text->chunkLength = length <= std::numeric_limits<int32_t>::max()
                          ? static_cast<int32_t>(length)
                          : 0;
  text->nativeIndexingLimit = text->chunkLength;
  text->chunkOffset = forward ? 0 : text->chunkLength;
  StringImpl::copyChars(
      const_cast<UChar*>(text->chunkContents),
      static_cast<const LChar*>(text->p) + (text->chunkNativeStart - text->b),
      static_cast<unsigned>(text->chunkLength));
}

static void textLatin1SwitchToPrimaryContext(UText* text,
                                             int64_t nativeIndex,
                                             int64_t nativeLength,
                                             UBool forward) {
  ASSERT(!text->chunkContents || text->chunkContents == text->q);
  text->chunkContents = static_cast<const UChar*>(text->pExtra);
  textLatin1MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
}

static void textLatin1MoveInPriorContext(UText* text,
                                         int64_t nativeIndex,
                                         int64_t nativeLength,
                                         UBool forward) {
  ASSERT(text->chunkContents == text->q);
  ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b);
  DCHECK(forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
  DCHECK(forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
  text->chunkNativeStart = 0;
  text->chunkNativeLimit = text->b;
  text->chunkLength = text->b;
  text->nativeIndexingLimit = text->chunkLength;
  int64_t offset = nativeIndex - text->chunkNativeStart;
  // Ensure chunk offset is well defined if computed offset exceeds int32_t
  // range or chunk length.
  ASSERT(offset <= std::numeric_limits<int32_t>::max());
  text->chunkOffset = std::min(offset <= std::numeric_limits<int32_t>::max()
                                   ? static_cast<int32_t>(offset)
                                   : 0,
                               text->chunkLength);
}

static void textLatin1SwitchToPriorContext(UText* text,
                                           int64_t nativeIndex,
                                           int64_t nativeLength,
                                           UBool forward) {
  ASSERT(!text->chunkContents || text->chunkContents == text->pExtra);
  text->chunkContents = static_cast<const UChar*>(text->q);
  textLatin1MoveInPriorContext(text, nativeIndex, nativeLength, forward);
}

static inline bool textInChunkOrOutOfRange(UText* text,
                                           int64_t nativeIndex,
                                           int64_t nativeLength,
                                           UBool forward,
                                           UBool& isAccessible) {
  if (forward) {
    if (nativeIndex >= text->chunkNativeStart &&
        nativeIndex < text->chunkNativeLimit) {
      int64_t offset = nativeIndex - text->chunkNativeStart;
      // Ensure chunk offset is well formed if computed offset exceeds int32_t
      // range.
      ASSERT(offset <= std::numeric_limits<int32_t>::max());
      text->chunkOffset = offset <= std::numeric_limits<int32_t>::max()
                              ? static_cast<int32_t>(offset)
                              : 0;
      isAccessible = TRUE;
      return true;
    }
    if (nativeIndex >= nativeLength && text->chunkNativeLimit == nativeLength) {
      text->chunkOffset = text->chunkLength;
      isAccessible = FALSE;
      return true;
    }
  } else {
    if (nativeIndex > text->chunkNativeStart &&
        nativeIndex <= text->chunkNativeLimit) {
      int64_t offset = nativeIndex - text->chunkNativeStart;
      // Ensure chunk offset is well formed if computed offset exceeds int32_t
      // range.
      ASSERT(offset <= std::numeric_limits<int32_t>::max());
      text->chunkOffset = offset <= std::numeric_limits<int32_t>::max()
                              ? static_cast<int32_t>(offset)
                              : 0;
      isAccessible = TRUE;
      return true;
    }
    if (nativeIndex <= 0 && !text->chunkNativeStart) {
      text->chunkOffset = 0;
      isAccessible = FALSE;
      return true;
    }
  }
  return false;
}

static UBool textLatin1Access(UText* text, int64_t nativeIndex, UBool forward) {
  if (!text->context)
    return FALSE;
  int64_t nativeLength = textNativeLength(text);
  UBool isAccessible;
  if (textInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward,
                              isAccessible))
    return isAccessible;
  nativeIndex = textPinIndex(nativeIndex, nativeLength - 1);
  TextContext currentContext = textLatin1GetCurrentContext(text);
  TextContext newContext = textGetContext(text, nativeIndex, forward);
  ASSERT(newContext != NoContext);
  if (newContext == currentContext) {
    if (currentContext == PrimaryContext) {
      textLatin1MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
    } else {
      textLatin1MoveInPriorContext(text, nativeIndex, nativeLength, forward);
    }
  } else if (newContext == PrimaryContext) {
    textLatin1SwitchToPrimaryContext(text, nativeIndex, nativeLength, forward);
  } else {
    ASSERT(newContext == PriorContext);
    textLatin1SwitchToPriorContext(text, nativeIndex, nativeLength, forward);
  }
  return TRUE;
}

static const struct UTextFuncs textLatin1Funcs = {
    sizeof(UTextFuncs), 0,           0, 0, textClone, textNativeLength,
    textLatin1Access,   textExtract, 0, 0, 0,         0,
    textClose,          0,           0, 0,
};

static void textInit(UText* text,
                     const UTextFuncs* funcs,
                     const void* string,
                     unsigned length,
                     const UChar* priorContext,
                     int priorContextLength) {
  text->pFuncs = funcs;
  text->providerProperties = 1 << UTEXT_PROVIDER_STABLE_CHUNKS;
  text->context = string;
  text->p = string;
  text->a = length;
  text->q = priorContext;
  text->b = priorContextLength;
}

static UText* textOpenLatin1(UTextWithBuffer* utWithBuffer,
                             const LChar* string,
                             unsigned length,
                             const UChar* priorContext,
                             int priorContextLength,
                             UErrorCode* status) {
  if (U_FAILURE(*status))
    return 0;

  if (!string ||
      length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) {
    *status = U_ILLEGAL_ARGUMENT_ERROR;
    return 0;
  }
  UText* text =
      utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status);
  if (U_FAILURE(*status)) {
    ASSERT(!text);
    return 0;
  }
  textInit(text, &textLatin1Funcs, string, length, priorContext,
           priorContextLength);
  return text;
}

static inline TextContext textUTF16GetCurrentContext(const UText* text) {
  if (!text->chunkContents)
    return NoContext;
  return text->chunkContents == text->p ? PrimaryContext : PriorContext;
}

static void textUTF16MoveInPrimaryContext(UText* text,
                                          int64_t nativeIndex,
                                          int64_t nativeLength,
                                          UBool forward) {
  ASSERT(text->chunkContents == text->p);
  DCHECK(forward ? nativeIndex >= text->b : nativeIndex > text->b);
  DCHECK(forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
  text->chunkNativeStart = text->b;
  text->chunkNativeLimit = nativeLength;
  int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
  // Ensure chunk length is well defined if computed length exceeds int32_t
  // range.
  ASSERT(length <= std::numeric_limits<int32_t>::max());
  text->chunkLength = length <= std::numeric_limits<int32_t>::max()
                          ? static_cast<int32_t>(length)
                          : 0;
  text->nativeIndexingLimit = text->chunkLength;
  int64_t offset = nativeIndex - text->chunkNativeStart;
  // Ensure chunk offset is well defined if computed offset exceeds int32_t
  // range or chunk length.
  ASSERT(offset <= std::numeric_limits<int32_t>::max());
  text->chunkOffset = std::min(offset <= std::numeric_limits<int32_t>::max()
                                   ? static_cast<int32_t>(offset)
                                   : 0,
                               text->chunkLength);
}

static void textUTF16SwitchToPrimaryContext(UText* text,
                                            int64_t nativeIndex,
                                            int64_t nativeLength,
                                            UBool forward) {
  ASSERT(!text->chunkContents || text->chunkContents == text->q);
  text->chunkContents = static_cast<const UChar*>(text->p);
  textUTF16MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
}

static void textUTF16MoveInPriorContext(UText* text,
                                        int64_t nativeIndex,
                                        int64_t nativeLength,
                                        UBool forward) {
  ASSERT(text->chunkContents == text->q);
  ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b);
  DCHECK(forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
  DCHECK(forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
  text->chunkNativeStart = 0;
  text->chunkNativeLimit = text->b;
  text->chunkLength = text->b;
  text->nativeIndexingLimit = text->chunkLength;
  int64_t offset = nativeIndex - text->chunkNativeStart;
  // Ensure chunk offset is well defined if computed offset exceeds int32_t
  // range or chunk length.
  ASSERT(offset <= std::numeric_limits<int32_t>::max());
  text->chunkOffset = std::min(offset <= std::numeric_limits<int32_t>::max()
                                   ? static_cast<int32_t>(offset)
                                   : 0,
                               text->chunkLength);
}

static void textUTF16SwitchToPriorContext(UText* text,
                                          int64_t nativeIndex,
                                          int64_t nativeLength,
                                          UBool forward) {
  ASSERT(!text->chunkContents || text->chunkContents == text->p);
  text->chunkContents = static_cast<const UChar*>(text->q);
  textUTF16MoveInPriorContext(text, nativeIndex, nativeLength, forward);
}

static UBool textUTF16Access(UText* text, int64_t nativeIndex, UBool forward) {
  if (!text->context)
    return FALSE;
  int64_t nativeLength = textNativeLength(text);
  UBool isAccessible;
  if (textInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward,
                              isAccessible))
    return isAccessible;
  nativeIndex = textPinIndex(nativeIndex, nativeLength - 1);
  TextContext currentContext = textUTF16GetCurrentContext(text);
  TextContext newContext = textGetContext(text, nativeIndex, forward);
  ASSERT(newContext != NoContext);
  if (newContext == currentContext) {
    if (currentContext == PrimaryContext) {
      textUTF16MoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
    } else {
      textUTF16MoveInPriorContext(text, nativeIndex, nativeLength, forward);
    }
  } else if (newContext == PrimaryContext) {
    textUTF16SwitchToPrimaryContext(text, nativeIndex, nativeLength, forward);
  } else {
    ASSERT(newContext == PriorContext);
    textUTF16SwitchToPriorContext(text, nativeIndex, nativeLength, forward);
  }
  return TRUE;
}

static const struct UTextFuncs textUTF16Funcs = {
    sizeof(UTextFuncs), 0,           0, 0, textClone, textNativeLength,
    textUTF16Access,    textExtract, 0, 0, 0,         0,
    textClose,          0,           0, 0,
};

static UText* textOpenUTF16(UText* text,
                            const UChar* string,
                            unsigned length,
                            const UChar* priorContext,
                            int priorContextLength,
                            UErrorCode* status) {
  if (U_FAILURE(*status))
    return 0;

  if (!string ||
      length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) {
    *status = U_ILLEGAL_ARGUMENT_ERROR;
    return 0;
  }

  text = utext_setup(text, 0, status);
  if (U_FAILURE(*status)) {
    ASSERT(!text);
    return 0;
  }
  textInit(text, &textUTF16Funcs, string, length, priorContext,
           priorContextLength);
  return text;
}

static UText emptyText = UTEXT_INITIALIZER;

static TextBreakIterator* wordBreakIterator(const LChar* string, int length) {
  UErrorCode errorCode = U_ZERO_ERROR;
  static TextBreakIterator* breakIter = 0;
  if (!breakIter) {
    breakIter = icu::BreakIterator::createWordInstance(
        icu::Locale(currentTextBreakLocaleID()), errorCode);
    DCHECK(U_SUCCESS(errorCode))
        << "ICU could not open a break iterator: " << u_errorName(errorCode)
        << " (" << errorCode << ")";
    if (!breakIter)
      return 0;
  }

  UTextWithBuffer textLocal;
  textLocal.text = emptyText;
  textLocal.text.extraSize = sizeof(textLocal.buffer);
  textLocal.text.pExtra = textLocal.buffer;

  UErrorCode openStatus = U_ZERO_ERROR;
  UText* text = textOpenLatin1(&textLocal, string, length, 0, 0, &openStatus);
  if (U_FAILURE(openStatus)) {
    DLOG(ERROR) << "textOpenLatin1 failed with status " << openStatus;
    return 0;
  }

  UErrorCode setTextStatus = U_ZERO_ERROR;
  breakIter->setText(text, setTextStatus);
  if (U_FAILURE(setTextStatus))
    DLOG(ERROR) << "BreakIterator::seText failed with status " << setTextStatus;

  utext_close(text);

  return breakIter;
}

static void setText16(TextBreakIterator* iter,
                      const UChar* string,
                      int length) {
  UErrorCode errorCode = U_ZERO_ERROR;
  UText uText = UTEXT_INITIALIZER;
  utext_openUChars(&uText, string, length, &errorCode);
  if (U_FAILURE(errorCode))
    return;
  iter->setText(&uText, errorCode);
}

TextBreakIterator* wordBreakIterator(const UChar* string, int length) {
  UErrorCode errorCode = U_ZERO_ERROR;
  static TextBreakIterator* breakIter = 0;
  if (!breakIter) {
    breakIter = icu::BreakIterator::createWordInstance(
        icu::Locale(currentTextBreakLocaleID()), errorCode);
    DCHECK(U_SUCCESS(errorCode))
        << "ICU could not open a break iterator: " << u_errorName(errorCode)
        << " (" << errorCode << ")";
    if (!breakIter)
      return 0;
  }
  setText16(breakIter, string, length);
  return breakIter;
}

TextBreakIterator* wordBreakIterator(const String& string,
                                     int start,
                                     int length) {
  if (string.isEmpty())
    return 0;
  if (string.is8Bit())
    return wordBreakIterator(string.characters8() + start, length);
  return wordBreakIterator(string.characters16() + start, length);
}

TextBreakIterator* acquireLineBreakIterator(const LChar* string,
                                            int length,
                                            const AtomicString& locale,
                                            const UChar* priorContext,
                                            unsigned priorContextLength) {
  TextBreakIterator* iterator =
      LineBreakIteratorPool::sharedPool().take(locale);
  if (!iterator)
    return 0;

  UTextWithBuffer textLocal;
  textLocal.text = emptyText;
  textLocal.text.extraSize = sizeof(textLocal.buffer);
  textLocal.text.pExtra = textLocal.buffer;

  UErrorCode openStatus = U_ZERO_ERROR;
  UText* text = textOpenLatin1(&textLocal, string, length, priorContext,
                               priorContextLength, &openStatus);
  if (U_FAILURE(openStatus)) {
    DLOG(ERROR) << "textOpenLatin1 failed with status " << openStatus;
    return 0;
  }

  UErrorCode setTextStatus = U_ZERO_ERROR;
  iterator->setText(text, setTextStatus);
  if (U_FAILURE(setTextStatus)) {
    DLOG(ERROR) << "ubrk_setUText failed with status " << setTextStatus;
    return 0;
  }

  utext_close(text);

  return iterator;
}

TextBreakIterator* acquireLineBreakIterator(const UChar* string,
                                            int length,
                                            const AtomicString& locale,
                                            const UChar* priorContext,
                                            unsigned priorContextLength) {
  TextBreakIterator* iterator =
      LineBreakIteratorPool::sharedPool().take(locale);
  if (!iterator)
    return 0;

  UText textLocal = UTEXT_INITIALIZER;

  UErrorCode openStatus = U_ZERO_ERROR;
  UText* text = textOpenUTF16(&textLocal, string, length, priorContext,
                              priorContextLength, &openStatus);
  if (U_FAILURE(openStatus)) {
    DLOG(ERROR) << "textOpenUTF16 failed with status " << openStatus;
    return 0;
  }

  UErrorCode setTextStatus = U_ZERO_ERROR;
  iterator->setText(text, setTextStatus);
  if (U_FAILURE(setTextStatus)) {
    DLOG(ERROR) << "ubrk_setUText failed with status " << setTextStatus;
    return 0;
  }

  utext_close(text);

  return iterator;
}

void releaseLineBreakIterator(TextBreakIterator* iterator) {
  DCHECK(iterator);
  LineBreakIteratorPool::sharedPool().put(iterator);
}

static TextBreakIterator* nonSharedCharacterBreakIterator;

static inline bool compareAndSwapNonSharedCharacterBreakIterator(
    TextBreakIterator* expected,
    TextBreakIterator* newValue) {
  DEFINE_STATIC_LOCAL(Mutex, nonSharedCharacterBreakIteratorMutex, ());
  MutexLocker locker(nonSharedCharacterBreakIteratorMutex);
  if (nonSharedCharacterBreakIterator != expected)
    return false;
  nonSharedCharacterBreakIterator = newValue;
  return true;
}

NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(
    const String& string)
    : m_is8Bit(true), m_charaters8(0), m_offset(0), m_length(0), m_iterator(0) {
  if (string.isEmpty())
    return;

  m_is8Bit = string.is8Bit();

  if (m_is8Bit) {
    m_charaters8 = string.characters8();
    m_offset = 0;
    m_length = string.length();
    return;
  }

  createIteratorForBuffer(string.characters16(), string.length());
}

NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(
    const UChar* buffer,
    unsigned length)
    : m_is8Bit(false),
      m_charaters8(0),
      m_offset(0),
      m_length(0),
      m_iterator(0) {
  createIteratorForBuffer(buffer, length);
}

void NonSharedCharacterBreakIterator::createIteratorForBuffer(
    const UChar* buffer,
    unsigned length) {
  m_iterator = nonSharedCharacterBreakIterator;
  bool createdIterator =
      m_iterator &&
      compareAndSwapNonSharedCharacterBreakIterator(m_iterator, 0);
  if (!createdIterator) {
    UErrorCode errorCode = U_ZERO_ERROR;
    m_iterator = icu::BreakIterator::createCharacterInstance(
        icu::Locale(currentTextBreakLocaleID()), errorCode);
    DCHECK(U_SUCCESS(errorCode))
        << "ICU could not open a break iterator: " << u_errorName(errorCode)
        << " (" << errorCode << ")";
  }

  setText16(m_iterator, buffer, length);
}

NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator() {
  if (m_is8Bit)
    return;
  if (!compareAndSwapNonSharedCharacterBreakIterator(0, m_iterator))
    delete m_iterator;
}

int NonSharedCharacterBreakIterator::next() {
  if (!m_is8Bit)
    return m_iterator->next();

  if (m_offset >= m_length)
    return TextBreakDone;

  m_offset += clusterLengthStartingAt(m_offset);
  return m_offset;
}

int NonSharedCharacterBreakIterator::current() {
  if (!m_is8Bit)
    return m_iterator->current();
  return m_offset;
}

bool NonSharedCharacterBreakIterator::isBreak(int offset) const {
  if (!m_is8Bit)
    return m_iterator->isBoundary(offset);
  return !isLFAfterCR(offset);
}

int NonSharedCharacterBreakIterator::preceding(int offset) const {
  if (!m_is8Bit)
    return m_iterator->preceding(offset);
  if (offset <= 0)
    return TextBreakDone;
  if (isLFAfterCR(offset))
    return offset - 2;
  return offset - 1;
}

int NonSharedCharacterBreakIterator::following(int offset) const {
  if (!m_is8Bit)
    return m_iterator->following(offset);
  if (static_cast<unsigned>(offset) >= m_length)
    return TextBreakDone;
  return offset + clusterLengthStartingAt(offset);
}

TextBreakIterator* sentenceBreakIterator(const UChar* string, int length) {
  UErrorCode openStatus = U_ZERO_ERROR;
  static TextBreakIterator* iterator = 0;
  if (!iterator) {
    iterator = icu::BreakIterator::createSentenceInstance(
        icu::Locale(currentTextBreakLocaleID()), openStatus);
    DCHECK(U_SUCCESS(openStatus))
        << "ICU could not open a break iterator: " << u_errorName(openStatus)
        << " (" << openStatus << ")";
    if (!iterator)
      return 0;
  }

  setText16(iterator, string, length);
  return iterator;
}

bool isWordTextBreak(TextBreakIterator* iterator) {
  icu::RuleBasedBreakIterator* ruleBasedBreakIterator =
      static_cast<icu::RuleBasedBreakIterator*>(iterator);
  int ruleStatus = ruleBasedBreakIterator->getRuleStatus();
  return ruleStatus != UBRK_WORD_NONE;
}

static TextBreakIterator* setUpIteratorWithRules(const char* breakRules,
                                                 const UChar* string,
                                                 int length) {
  if (!string)
    return 0;

  static TextBreakIterator* iterator = 0;
  if (!iterator) {
    UParseError parseStatus;
    UErrorCode openStatus = U_ZERO_ERROR;
    Vector<UChar> rules;
    String(breakRules).appendTo(rules);

    iterator = new icu::RuleBasedBreakIterator(
        icu::UnicodeString(rules.data(), rules.size()), parseStatus,
        openStatus);
    DCHECK(U_SUCCESS(openStatus))
        << "ICU could not open a break iterator: " << u_errorName(openStatus)
        << " (" << openStatus << ")";
    if (!iterator)
      return 0;
  }

  setText16(iterator, string, length);
  return iterator;
}

TextBreakIterator* cursorMovementIterator(const UChar* string, int length) {
  // This rule set is based on character-break iterator rules of ICU 4.0
  // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>.
  // The major differences from the original ones are listed below:
  // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with
  //   '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
  // * Removed rules that prevent a cursor from moving after prepend characters
  //   (Bug 24342);
  // * Added rules that prevent a cursor from moving after virama signs of Indic
  //   languages except Tamil (Bug 15790), and;
  // * Added rules that prevent a cursor from moving before Japanese half-width
  //   katakara voiced marks.
  // * Added rules for regional indicator symbols.
  static const char* const kRules =
      "$CR      = [\\p{Grapheme_Cluster_Break = CR}];"
      "$LF      = [\\p{Grapheme_Cluster_Break = LF}];"
      "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
      "$VoiceMarks = [\\uFF9E\\uFF9F];"  // Japanese half-width katakana voiced
                                         // marks
      "$Extend  = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 "
      "\\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
      "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
      "$L       = [\\p{Grapheme_Cluster_Break = L}];"
      "$V       = [\\p{Grapheme_Cluster_Break = V}];"
      "$T       = [\\p{Grapheme_Cluster_Break = T}];"
      "$LV      = [\\p{Grapheme_Cluster_Break = LV}];"
      "$LVT     = [\\p{Grapheme_Cluster_Break = LVT}];"
      "$Hin0    = [\\u0905-\\u0939];"          // Devanagari Letter A,...,Ha
      "$HinV    = \\u094D;"                    // Devanagari Sign Virama
      "$Hin1    = [\\u0915-\\u0939];"          // Devanagari Letter Ka,...,Ha
      "$Ben0    = [\\u0985-\\u09B9];"          // Bengali Letter A,...,Ha
      "$BenV    = \\u09CD;"                    // Bengali Sign Virama
      "$Ben1    = [\\u0995-\\u09B9];"          // Bengali Letter Ka,...,Ha
      "$Pan0    = [\\u0A05-\\u0A39];"          // Gurmukhi Letter A,...,Ha
      "$PanV    = \\u0A4D;"                    // Gurmukhi Sign Virama
      "$Pan1    = [\\u0A15-\\u0A39];"          // Gurmukhi Letter Ka,...,Ha
      "$Guj0    = [\\u0A85-\\u0AB9];"          // Gujarati Letter A,...,Ha
      "$GujV    = \\u0ACD;"                    // Gujarati Sign Virama
      "$Guj1    = [\\u0A95-\\u0AB9];"          // Gujarati Letter Ka,...,Ha
      "$Ori0    = [\\u0B05-\\u0B39];"          // Oriya Letter A,...,Ha
      "$OriV    = \\u0B4D;"                    // Oriya Sign Virama
      "$Ori1    = [\\u0B15-\\u0B39];"          // Oriya Letter Ka,...,Ha
      "$Tel0    = [\\u0C05-\\u0C39];"          // Telugu Letter A,...,Ha
      "$TelV    = \\u0C4D;"                    // Telugu Sign Virama
      "$Tel1    = [\\u0C14-\\u0C39];"          // Telugu Letter Ka,...,Ha
      "$Kan0    = [\\u0C85-\\u0CB9];"          // Kannada Letter A,...,Ha
      "$KanV    = \\u0CCD;"                    // Kannada Sign Virama
      "$Kan1    = [\\u0C95-\\u0CB9];"          // Kannada Letter A,...,Ha
      "$Mal0    = [\\u0D05-\\u0D39];"          // Malayalam Letter A,...,Ha
      "$MalV    = \\u0D4D;"                    // Malayalam Sign Virama
      "$Mal1    = [\\u0D15-\\u0D39];"          // Malayalam Letter A,...,Ha
      "$RI      = [\\U0001F1E6-\\U0001F1FF];"  // Emoji regional indicators
      "!!chain;"
      "!!forward;"
      "$CR $LF;"
      "$L ($L | $V | $LV | $LVT);"
      "($LV | $V) ($V | $T);"
      "($LVT | $T) $T;"
      "[^$Control $CR $LF] $Extend;"
      "[^$Control $CR $LF] $SpacingMark;"
      "$RI $RI / $RI;"
      "$RI $RI;"
      "$Hin0 $HinV $Hin1;"  // Devanagari Virama (forward)
      "$Ben0 $BenV $Ben1;"  // Bengali Virama (forward)
      "$Pan0 $PanV $Pan1;"  // Gurmukhi Virama (forward)
      "$Guj0 $GujV $Guj1;"  // Gujarati Virama (forward)
      "$Ori0 $OriV $Ori1;"  // Oriya Virama (forward)
      "$Tel0 $TelV $Tel1;"  // Telugu Virama (forward)
      "$Kan0 $KanV $Kan1;"  // Kannada Virama (forward)
      "$Mal0 $MalV $Mal1;"  // Malayalam Virama (forward)
      "!!reverse;"
      "$LF $CR;"
      "($L | $V | $LV | $LVT) $L;"
      "($V | $T) ($LV | $V);"
      "$T ($LVT | $T);"
      "$Extend      [^$Control $CR $LF];"
      "$SpacingMark [^$Control $CR $LF];"
      "$RI $RI / $RI $RI;"
      "$RI $RI;"
      "$Hin1 $HinV $Hin0;"  // Devanagari Virama (backward)
      "$Ben1 $BenV $Ben0;"  // Bengali Virama (backward)
      "$Pan1 $PanV $Pan0;"  // Gurmukhi Virama (backward)
      "$Guj1 $GujV $Guj0;"  // Gujarati Virama (backward)
      "$Ori1 $OriV $Ori0;"  // Gujarati Virama (backward)
      "$Tel1 $TelV $Tel0;"  // Telugu Virama (backward)
      "$Kan1 $KanV $Kan0;"  // Kannada Virama (backward)
      "$Mal1 $MalV $Mal0;"  // Malayalam Virama (backward)
      "!!safe_reverse;"
      "!!safe_forward;";

  return setUpIteratorWithRules(kRules, string, length);
}

}  // namespace blink