1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
|
// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/omnibox/browser/tailored_word_break_iterator.h"
#include <string>
#include <string_view>
#include "base/check.h"
#include "base/i18n/break_iterator.h"
using base::i18n::BreakIterator;
TailoredWordBreakIterator::TailoredWordBreakIterator(std::u16string_view str)
: BreakIterator(str, BreakIterator::BREAK_WORD),
prev_(0),
pos_(0),
word_breaks_{u"0123456789"},
non_word_breaks_{u"_"},
all_breaks_{word_breaks_ + non_word_breaks_} {}
TailoredWordBreakIterator::~TailoredWordBreakIterator() = default;
bool TailoredWordBreakIterator::Advance() {
if (HasSpecialWord() && AdvanceInSpecialWord()) {
return true;
}
if (!BreakIterator::Advance())
return false;
prev_ = 0;
pos_ = 0;
special_word_ = std::u16string_view();
if (!IsWord())
return true;
std::u16string_view word = BreakIterator::GetString();
if (word.find_first_of(all_breaks_) != std::u16string_view::npos) {
special_word_ = word;
AdvanceInSpecialWord();
}
return true;
}
bool TailoredWordBreakIterator::IsWord() const {
if (HasSpecialWord()) {
std::u16string_view word = GetString();
if (!word.empty())
return non_word_breaks_.find(word[0]) == std::u16string::npos;
}
return BreakIterator::IsWord();
}
std::u16string_view TailoredWordBreakIterator::GetString() const {
return special_word_.empty() ? BreakIterator::GetString()
: special_word_.substr(prev_, pos_ - prev_);
}
size_t TailoredWordBreakIterator::prev() const {
return BreakIterator::prev() + prev_;
}
size_t TailoredWordBreakIterator::pos() const {
return BreakIterator::pos() + pos_;
}
bool TailoredWordBreakIterator::HasSpecialWord() const {
return !special_word_.empty();
}
bool TailoredWordBreakIterator::AdvanceInSpecialWord() {
DCHECK(HasSpecialWord());
// If we've finished with the special word we're processing, return false
// and let the caller call advance on the outer `BreakIterator`.
if (pos_ == special_word_.size()) {
prev_ = 0;
pos_ = 0;
special_word_ = std::u16string_view();
return false;
}
prev_ = pos_;
auto c = special_word_[pos_];
if (non_word_breaks_.find(c) != std::u16string::npos) {
// If at a non-word word-break (e.g. '_'), advance 1 char. Don't advance to
// the end of the word-break series to be consistent with how
// `BreakIterator` handles other symbols.
pos_++;
} else if (word_breaks_.find(c) != std::u16string::npos) {
// If at a word word-break (e.g. numbers), advance to the end of the series.
pos_ = special_word_.find_first_not_of(word_breaks_, pos_ + 1);
} else {
// Otherwise, at a non-word-break, advance to the next word-break.
pos_ = special_word_.find_first_of(all_breaks_, pos_ + 1);
}
if (pos_ == std::u16string::npos)
pos_ = special_word_.size();
return true;
}
|