File: filename_elider.cc

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
links: PTS, VCS
area: main
in suites: bookworm-proposed-updates
size: 6,080,960 kB
sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (249 lines) | stat: -rw-r--r-- 9,970 bytes
parent folder | download | duplicates (5)
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/ui/views/tabs/filename_elider.h"

#include <stddef.h>

#include <string>
#include <string_view>

#include "base/i18n/break_iterator.h"
#include "base/i18n/char_iterator.h"
#include "base/strings/string_util.h"
#include "ui/gfx/selection_model.h"
#include "ui/gfx/text_elider.h"
#include "ui/gfx/text_utils.h"

FilenameElider::FilenameElider(std::unique_ptr<gfx::RenderText> render_text)
    : render_text_(std::move(render_text)) {}

FilenameElider::~FilenameElider() = default;

std::u16string FilenameElider::Elide(std::u16string_view text,
                                     const gfx::Rect& display_rect) const {
  render_text_->SetText(text);
  return ElideImpl(GetLineLengths(display_rect));
}

// static
size_t FilenameElider::FindImageDimensions(std::u16string_view text) {
  // We don't have regexes in Chrome, but we can still do a rough evaluation of
  // the line to see if it ends with the expected pattern:
  //
  // title[ (width×height)]
  //
  // We'll look for the open parenthesis, then the rest of the size. Note that
  // we don't have to worry about graphemes or combining characters because any
  // character that's not of the expected type means there is no dimension.

  // Find the start of the extension.
  const auto paren_pos = text.find_last_of(u'(');
  if (paren_pos == 0 || paren_pos == std::u16string::npos ||
      text[paren_pos - 1] != u' ') {
    return std::u16string::npos;
  }

  // Fast forward to the unicode character following the paren.
  base::i18n::UTF16CharIterator it(text.substr(paren_pos + 1));

  // Look for the image width.
  if (!base::IsAsciiDigit(it.get())) {
    return std::u16string::npos;
  }
  while (it.Advance() && base::IsAsciiDigit(it.get())) {
    // empty loop
  }

  // Look for the × character and the height.
  constexpr char16_t kMultiplicationSymbol = u'\u00D7';
  if (it.end() || it.get() != kMultiplicationSymbol || !it.Advance() ||
      !base::IsAsciiDigit(it.get())) {
    return std::u16string::npos;
  }
  while (it.Advance() && base::IsAsciiDigit(it.get())) {
    // empty loop
  }

  // Look for the closing parenthesis and make sure we've hit the end of the
  // string.
  if (it.end() || it.get() != u')') {
    return std::u16string::npos;
  }
  it.Advance();
  return it.end() ? paren_pos : std::u16string::npos;
}

FilenameElider::LineLengths FilenameElider::GetLineLengths(
    const gfx::Rect& display_rect) const {
  render_text_->SetMaxLines(0);
  render_text_->SetMultiline(false);
  render_text_->SetWhitespaceElision(true);
  render_text_->SetDisplayRect(display_rect);

  // Set our temporary RenderText to the unelided text and elide the start of
  // the string to give us a guess at where the second line of the label
  // should start.
  render_text_->SetElideBehavior(gfx::ElideBehavior::ELIDE_HEAD);
  const std::u16string_view tentative_second_line =
      render_text_->GetDisplayText();

  // If there is no elision, then the text will fit on a single line and
  // there's nothing to do.
  const size_t length = render_text_->text().length();
  if (tentative_second_line == render_text_->text()) {
    return LineLengths(length, length);
  }

  // If there's not enough space to display even a single character, there is
  // also nothing to do; the result needs to be empty.
  if (tentative_second_line.empty()) {
    return LineLengths(0, 0);
  }

  LineLengths result;

  // Since we truncated, expect the string to start with ellipsis, then
  // calculate the length of the string sans ellipsis.
  DCHECK_EQ(gfx::kEllipsisUTF16[0], tentative_second_line[0]);

  // TODO(crbug.com/1239317): Elision is still a little flaky, so we'll make
  // sure we didn't stop in the middle of a grapheme. The +1 is to move past
  // the ellipsis which is not part of the original string.
  size_t pos = length - tentative_second_line.length() + 1;
  if (!render_text_->IsGraphemeBoundary(pos)) {
    pos = render_text_->IndexOfAdjacentGrapheme(pos, gfx::CURSOR_FORWARD);
  }
  result.second = length - pos;

  // Calculate the first line by aggressively truncating the text. This may
  // cut the string somewhere other than a word boundary, but for very long
  // filenames, it's probably best to fit as much of the name on the card as
  // possible, even if we sacrifice a small amount of readability.
  render_text_->SetElideBehavior(gfx::ElideBehavior::TRUNCATE);
  result.first = render_text_->GetDisplayText().length();

  // TOOD(crbug.com/1239317) Handle the case where we ended up in the middle
  // of a grapheme.
  if (!render_text_->IsGraphemeBoundary(result.first)) {
    result.first = render_text_->IndexOfAdjacentGrapheme(result.first,
                                                         gfx::CURSOR_BACKWARD);
  }

  return result;
}

std::u16string FilenameElider::ElideImpl(
    FilenameElider::LineLengths line_lengths) const {
  std::u16string_view text = render_text_->text();

  // Validate the inputs. All of these are base assumptions.
  DCHECK_LE(line_lengths.first, text.length());
  DCHECK_LE(line_lengths.second, text.length());
  DCHECK(render_text_->IsGraphemeBoundary(line_lengths.first));
  DCHECK(render_text_->IsGraphemeBoundary(text.length() - line_lengths.second));

  // If the entire text fits on a single line, use it as-is.
  if (line_lengths.first == text.length() ||
      line_lengths.second == text.length()) {
    return std::u16string(text);
  }

  // If no characters will fit on one of the lines, return an empty string.
  if (line_lengths.first == 0 || line_lengths.second == 0) {
    return std::u16string();
  }

  // Let's figure out where to actually start the second line. Strings that
  // are too long for one line but fit on two lines tend to create some
  // overlap between the first and second line, so take the maximum of the
  // second line cut and the end of the first line.
  const size_t second_line_cut = text.length() - line_lengths.second;
  size_t cut_point = std::max(second_line_cut, line_lengths.first);

  // We got the whole line if the cut point is the character immediately
  // after the first line cuts off (otherwise we've truncated and need to
  // show an ellipsis in the final string).
  const bool is_whole_string = (cut_point == line_lengths.first);

  // If there is some flexibility in where we make our cut point (that is, the
  // potential first and second lines overlap), there are a few specific places
  // we preferentially want to separate the lines.
  bool adjusted_cut_point = false;
  if (is_whole_string && cut_point >= second_line_cut) {
    // First, if there are image dimensions, preferentially put those on the
    // second line.
    const auto paren_pos = FindImageDimensions(text);
    if (paren_pos != std::u16string::npos && paren_pos >= second_line_cut &&
        paren_pos <= cut_point) {
      cut_point = paren_pos;
      adjusted_cut_point = true;
    }

    // Second, we can break at the start of the file extension.
    if (!adjusted_cut_point) {
      const size_t dot_pos = text.find_last_of(u'.');
      if (dot_pos != std::u16string::npos && dot_pos >= second_line_cut &&
          dot_pos <= cut_point) {
        cut_point = dot_pos;
        adjusted_cut_point = true;
      }
    }
  }

  // TODO(dfried): possibly handle the case where we chop a section with bidi
  // delimiters out or split it between lines.

  // If we didn't put the extension on its own line, eliminate whitespace
  // from the start of the second line (it looks weird).
  if (!adjusted_cut_point) {
    cut_point =
        gfx::FindValidBoundaryAfter(text, cut_point, /*trim_whitespace =*/true);
  }

  // Reassemble the string. Start with the first line up to `cut_point` or the
  // end of the line, whichever comes sooner.
  std::u16string result(
      text.substr(0, std::min(line_lengths.first, cut_point)));
  result.push_back(u'\n');

  // If we're starting the second line with a file extension hint that the
  // directionality of the text might change by using an FSI mark. Allowing
  // the renderer to re-infer RTL-ness produces much better results in text
  // rendering when an RTL filename has an ASCII extension.
  //
  // TODO(dfried): Currently we do put an FSI before an ellipsis; this
  // results in the ellipsis being placed with the text that immediately
  // follows it (making the point of elision more obvious). If the text
  // following the cut is LTR it goes on the left, and if the text is RTL it
  // goes on the right. Reconsider if/how we should set text direction
  // following an ellipsis:
  // - No FSI would cause the ellipsis to align with the preceding rather
  //   than the following text. It would provide a bit more visual continuity
  //   between lines, but might be confusing as to where the text picks back
  //   up (as the next character might be on the opposite side of the line).
  // - We could preserve elided directionality markers, but they could end up
  //   aligning the ellipsis with text that is not present at all on the
  //   label.
  // - We could also force direction to match the start of the first line for
  //   consistency but that could result in an ellipsis that matches neither
  //   the preceding nor following text.
  //
  // TODO(dfried): move these declarations to rtl.h alongside e.g.
  // base::i18n::kRightToLeftMark
  constexpr char16_t kFirstStrongIsolateMark = u'\u2068';
  constexpr char16_t kPopDirectionalIsolateMark = u'\u2069';
  if (adjusted_cut_point || !is_whole_string) {
    result += kFirstStrongIsolateMark;
  }
  if (!is_whole_string) {
    result.push_back(gfx::kEllipsisUTF16[0]);
  }
  result.append(text.substr(cut_point));
  // If we added an FSI, we should bracket it with a PDI.
  if (adjusted_cut_point || !is_whole_string) {
    result += kPopDirectionalIsolateMark;
  }
  return result;
}