File: filename_elider.cc

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (249 lines) | stat: -rw-r--r-- 9,970 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/ui/views/tabs/filename_elider.h"

#include <stddef.h>

#include <string>
#include <string_view>

#include "base/i18n/break_iterator.h"
#include "base/i18n/char_iterator.h"
#include "base/strings/string_util.h"
#include "ui/gfx/selection_model.h"
#include "ui/gfx/text_elider.h"
#include "ui/gfx/text_utils.h"

FilenameElider::FilenameElider(std::unique_ptr<gfx::RenderText> render_text)
    : render_text_(std::move(render_text)) {}

FilenameElider::~FilenameElider() = default;

std::u16string FilenameElider::Elide(std::u16string_view text,
                                     const gfx::Rect& display_rect) const {
  render_text_->SetText(text);
  return ElideImpl(GetLineLengths(display_rect));
}

// static
size_t FilenameElider::FindImageDimensions(std::u16string_view text) {
  // We don't have regexes in Chrome, but we can still do a rough evaluation of
  // the line to see if it ends with the expected pattern:
  //
  // title[ (width×height)]
  //
  // We'll look for the open parenthesis, then the rest of the size. Note that
  // we don't have to worry about graphemes or combining characters because any
  // character that's not of the expected type means there is no dimension.

  // Find the start of the extension.
  const auto paren_pos = text.find_last_of(u'(');
  if (paren_pos == 0 || paren_pos == std::u16string::npos ||
      text[paren_pos - 1] != u' ') {
    return std::u16string::npos;
  }

  // Fast forward to the unicode character following the paren.
  base::i18n::UTF16CharIterator it(text.substr(paren_pos + 1));

  // Look for the image width.
  if (!base::IsAsciiDigit(it.get())) {
    return std::u16string::npos;
  }
  while (it.Advance() && base::IsAsciiDigit(it.get())) {
    // empty loop
  }

  // Look for the × character and the height.
  constexpr char16_t kMultiplicationSymbol = u'\u00D7';
  if (it.end() || it.get() != kMultiplicationSymbol || !it.Advance() ||
      !base::IsAsciiDigit(it.get())) {
    return std::u16string::npos;
  }
  while (it.Advance() && base::IsAsciiDigit(it.get())) {
    // empty loop
  }

  // Look for the closing parenthesis and make sure we've hit the end of the
  // string.
  if (it.end() || it.get() != u')') {
    return std::u16string::npos;
  }
  it.Advance();
  return it.end() ? paren_pos : std::u16string::npos;
}

FilenameElider::LineLengths FilenameElider::GetLineLengths(
    const gfx::Rect& display_rect) const {
  render_text_->SetMaxLines(0);
  render_text_->SetMultiline(false);
  render_text_->SetWhitespaceElision(true);
  render_text_->SetDisplayRect(display_rect);

  // Set our temporary RenderText to the unelided text and elide the start of
  // the string to give us a guess at where the second line of the label
  // should start.
  render_text_->SetElideBehavior(gfx::ElideBehavior::ELIDE_HEAD);
  const std::u16string_view tentative_second_line =
      render_text_->GetDisplayText();

  // If there is no elision, then the text will fit on a single line and
  // there's nothing to do.
  const size_t length = render_text_->text().length();
  if (tentative_second_line == render_text_->text()) {
    return LineLengths(length, length);
  }

  // If there's not enough space to display even a single character, there is
  // also nothing to do; the result needs to be empty.
  if (tentative_second_line.empty()) {
    return LineLengths(0, 0);
  }

  LineLengths result;

  // Since we truncated, expect the string to start with ellipsis, then
  // calculate the length of the string sans ellipsis.
  DCHECK_EQ(gfx::kEllipsisUTF16[0], tentative_second_line[0]);

  // TODO(crbug.com/1239317): Elision is still a little flaky, so we'll make
  // sure we didn't stop in the middle of a grapheme. The +1 is to move past
  // the ellipsis which is not part of the original string.
  size_t pos = length - tentative_second_line.length() + 1;
  if (!render_text_->IsGraphemeBoundary(pos)) {
    pos = render_text_->IndexOfAdjacentGrapheme(pos, gfx::CURSOR_FORWARD);
  }
  result.second = length - pos;

  // Calculate the first line by aggressively truncating the text. This may
  // cut the string somewhere other than a word boundary, but for very long
  // filenames, it's probably best to fit as much of the name on the card as
  // possible, even if we sacrifice a small amount of readability.
  render_text_->SetElideBehavior(gfx::ElideBehavior::TRUNCATE);
  result.first = render_text_->GetDisplayText().length();

  // TOOD(crbug.com/1239317) Handle the case where we ended up in the middle
  // of a grapheme.
  if (!render_text_->IsGraphemeBoundary(result.first)) {
    result.first = render_text_->IndexOfAdjacentGrapheme(result.first,
                                                         gfx::CURSOR_BACKWARD);
  }

  return result;
}

std::u16string FilenameElider::ElideImpl(
    FilenameElider::LineLengths line_lengths) const {
  std::u16string_view text = render_text_->text();

  // Validate the inputs. All of these are base assumptions.
  DCHECK_LE(line_lengths.first, text.length());
  DCHECK_LE(line_lengths.second, text.length());
  DCHECK(render_text_->IsGraphemeBoundary(line_lengths.first));
  DCHECK(render_text_->IsGraphemeBoundary(text.length() - line_lengths.second));

  // If the entire text fits on a single line, use it as-is.
  if (line_lengths.first == text.length() ||
      line_lengths.second == text.length()) {
    return std::u16string(text);
  }

  // If no characters will fit on one of the lines, return an empty string.
  if (line_lengths.first == 0 || line_lengths.second == 0) {
    return std::u16string();
  }

  // Let's figure out where to actually start the second line. Strings that
  // are too long for one line but fit on two lines tend to create some
  // overlap between the first and second line, so take the maximum of the
  // second line cut and the end of the first line.
  const size_t second_line_cut = text.length() - line_lengths.second;
  size_t cut_point = std::max(second_line_cut, line_lengths.first);

  // We got the whole line if the cut point is the character immediately
  // after the first line cuts off (otherwise we've truncated and need to
  // show an ellipsis in the final string).
  const bool is_whole_string = (cut_point == line_lengths.first);

  // If there is some flexibility in where we make our cut point (that is, the
  // potential first and second lines overlap), there are a few specific places
  // we preferentially want to separate the lines.
  bool adjusted_cut_point = false;
  if (is_whole_string && cut_point >= second_line_cut) {
    // First, if there are image dimensions, preferentially put those on the
    // second line.
    const auto paren_pos = FindImageDimensions(text);
    if (paren_pos != std::u16string::npos && paren_pos >= second_line_cut &&
        paren_pos <= cut_point) {
      cut_point = paren_pos;
      adjusted_cut_point = true;
    }

    // Second, we can break at the start of the file extension.
    if (!adjusted_cut_point) {
      const size_t dot_pos = text.find_last_of(u'.');
      if (dot_pos != std::u16string::npos && dot_pos >= second_line_cut &&
          dot_pos <= cut_point) {
        cut_point = dot_pos;
        adjusted_cut_point = true;
      }
    }
  }

  // TODO(dfried): possibly handle the case where we chop a section with bidi
  // delimiters out or split it between lines.

  // If we didn't put the extension on its own line, eliminate whitespace
  // from the start of the second line (it looks weird).
  if (!adjusted_cut_point) {
    cut_point =
        gfx::FindValidBoundaryAfter(text, cut_point, /*trim_whitespace =*/true);
  }

  // Reassemble the string. Start with the first line up to `cut_point` or the
  // end of the line, whichever comes sooner.
  std::u16string result(
      text.substr(0, std::min(line_lengths.first, cut_point)));
  result.push_back(u'\n');

  // If we're starting the second line with a file extension hint that the
  // directionality of the text might change by using an FSI mark. Allowing
  // the renderer to re-infer RTL-ness produces much better results in text
  // rendering when an RTL filename has an ASCII extension.
  //
  // TODO(dfried): Currently we do put an FSI before an ellipsis; this
  // results in the ellipsis being placed with the text that immediately
  // follows it (making the point of elision more obvious). If the text
  // following the cut is LTR it goes on the left, and if the text is RTL it
  // goes on the right. Reconsider if/how we should set text direction
  // following an ellipsis:
  // - No FSI would cause the ellipsis to align with the preceding rather
  //   than the following text. It would provide a bit more visual continuity
  //   between lines, but might be confusing as to where the text picks back
  //   up (as the next character might be on the opposite side of the line).
  // - We could preserve elided directionality markers, but they could end up
  //   aligning the ellipsis with text that is not present at all on the
  //   label.
  // - We could also force direction to match the start of the first line for
  //   consistency but that could result in an ellipsis that matches neither
  //   the preceding nor following text.
  //
  // TODO(dfried): move these declarations to rtl.h alongside e.g.
  // base::i18n::kRightToLeftMark
  constexpr char16_t kFirstStrongIsolateMark = u'\u2068';
  constexpr char16_t kPopDirectionalIsolateMark = u'\u2069';
  if (adjusted_cut_point || !is_whole_string) {
    result += kFirstStrongIsolateMark;
  }
  if (!is_whole_string) {
    result.push_back(gfx::kEllipsisUTF16[0]);
  }
  result.append(text.substr(cut_point));
  // If we added an FSI, we should bracket it with a PDI.
  if (adjusted_cut_point || !is_whole_string) {
    result += kPopDirectionalIsolateMark;
  }
  return result;
}