File: text_fragment.cc

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (192 lines) | stat: -rw-r--r-- 6,280 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/shared_highlighting/core/common/text_fragment.h"

#include <sstream>

#include "base/strings/escape.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "components/shared_highlighting/core/common/fragment_directives_constants.h"

namespace {

// Escapes any special character such that the fragment can be added to a URL.
std::string Escape(const std::string& str) {
  std::string escaped = base::EscapeQueryParamValue(str, /*usePlus=*/false);

  // Hyphens must also be escaped since they are used to indicate prefix/suffix
  // components.
  std::string final_string;
  base::ReplaceChars(escaped, "-", "%2D", &final_string);
  return final_string;
}

// Unescapes any special character from a fragment which may be coming from a
// URL. Returns nullopt if the fragment can't be safely escaped (e.g., contains
// non-UTF8 characters).
std::optional<std::string> Unescape(std::string_view str) {
  std::string unescaped = base::UnescapeBinaryURLComponent(str);
  if (!base::IsStringUTF8(unescaped)) {
    return std::nullopt;
  }
  return unescaped;
}

bool HasValue(const std::string* str) {
  return str && !str->empty();
}

std::string ValueOrDefault(const std::string* str) {
  return HasValue(str) ? *str : "";
}

}  // namespace

namespace shared_highlighting {

TextFragment::TextFragment(const std::string& text_start)
    : TextFragment(text_start, std::string(), std::string(), std::string()) {}

TextFragment::TextFragment(const std::string& text_start,
                           const std::string& text_end,
                           const std::string& prefix,
                           const std::string& suffix)
    : text_start_(text_start),
      text_end_(text_end),
      prefix_(prefix),
      suffix_(suffix) {}

TextFragment::TextFragment(const TextFragment& other) = default;

TextFragment& TextFragment::operator=(const TextFragment& other) = default;

TextFragment::~TextFragment() = default;

// static
std::optional<TextFragment> TextFragment::FromEscapedString(
    std::string_view escaped_string) {
  // Text fragments have the format: [prefix-,]textStart[,textEnd][,-suffix]
  // That is, textStart is the only required param, all params are separated by
  // commas, and prefix/suffix have a trailing/leading hyphen.
  // Any commas, ampersands, or hyphens inside of these values must be
  // URL-encoded.

  // First, try to extract the optional prefix and suffix params. These have a
  // '-' as their last or first character, respectively, which should not be
  // carried over to the final dict.
  std::string_view prefix;
  size_t prefix_delimiter_pos = escaped_string.find("-,");
  if (prefix_delimiter_pos != std::string_view::npos) {
    prefix = escaped_string.substr(0, prefix_delimiter_pos);
    escaped_string = escaped_string.substr(prefix_delimiter_pos + 2);
  }

  std::string_view suffix;
  size_t suffix_delimiter_pos = escaped_string.rfind(",-");
  if (suffix_delimiter_pos != std::string_view::npos) {
    suffix = escaped_string.substr(suffix_delimiter_pos + 2);
    escaped_string = escaped_string.substr(0, suffix_delimiter_pos);
  }

  std::vector<std::string_view> pieces = base::SplitStringPiece(
      escaped_string, ",", base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

  if (pieces.size() > 2 || pieces.empty() || pieces[0].empty()) {
    // Malformed if no piece is left for the textStart
    return std::nullopt;
  }

  std::string_view text_start = pieces[0];
  std::string_view text_end = pieces.size() == 2 ? pieces[1] : "";

  if (prefix.find_first_of("&-,") != std::string_view::npos ||
      text_start.find_first_of("&-,") != std::string_view::npos ||
      text_end.find_first_of("&-,") != std::string_view::npos ||
      suffix.find_first_of("&-,") != std::string_view::npos) {
    // Malformed if any of the pieces contain characters that are supposed to be
    // URL-encoded.
    return std::nullopt;
  }

  std::optional<std::string> unescaped_text_start = Unescape(text_start);
  std::optional<std::string> unescaped_text_end = Unescape(text_end);
  std::optional<std::string> unescaped_prefix = Unescape(prefix);
  std::optional<std::string> unescaped_suffix = Unescape(suffix);

  if (!unescaped_text_start || !unescaped_text_end || !unescaped_prefix ||
      !unescaped_suffix) {
    return std::nullopt;
  }

  return TextFragment(*unescaped_text_start, *unescaped_text_end,
                      *unescaped_prefix, *unescaped_suffix);
}

// static
std::optional<TextFragment> TextFragment::FromValue(const base::Value* value) {
  if (!value || !value->is_dict()) {
    return std::nullopt;
  }

  const base::Value::Dict& dict = value->GetDict();
  const std::string* text_start = dict.FindString(kFragmentTextStartKey);
  const std::string* text_end = dict.FindString(kFragmentTextEndKey);
  const std::string* prefix = dict.FindString(kFragmentPrefixKey);
  const std::string* suffix = dict.FindString(kFragmentSuffixKey);

  if (!HasValue(text_start)) {
    // Text Start is the only required parameter.
    return std::nullopt;
  }

  return TextFragment(*text_start, ValueOrDefault(text_end),
                      ValueOrDefault(prefix), ValueOrDefault(suffix));
}

std::string TextFragment::ToEscapedString(EscapedStringFormat format) const {
  if (text_start_.empty()) {
    return std::string();
  }
  std::stringstream ss;
  if (format == EscapedStringFormat::kWithTextDirective) {
    ss << kTextDirectiveParameterName;
  }

  if (!prefix_.empty()) {
    ss << Escape(prefix_) << "-,";
  }

  ss << Escape(text_start_);

  if (!text_end_.empty()) {
    ss << "," << Escape(text_end_);
  }

  if (!suffix_.empty()) {
    ss << ",-" << Escape(suffix_);
  }

  return ss.str();
}

base::Value TextFragment::ToValue() const {
  base::Value::Dict dict;

  if (prefix_.size())
    dict.Set(kFragmentPrefixKey, prefix_);

  dict.Set(kFragmentTextStartKey, text_start_);

  if (text_end_.size())
    dict.Set(kFragmentTextEndKey, text_end_);

  if (suffix_.size())
    dict.Set(kFragmentSuffixKey, suffix_);

  return base::Value(std::move(dict));
}

}  // namespace shared_highlighting