File: string_cleaning.h

package info (click to toggle)
chromium 138.0.7204.157-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,071,864 kB
  • sloc: cpp: 34,936,859; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,967; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (46 lines) | stat: -rw-r--r-- 1,977 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_OMNIBOX_COMMON_STRING_CLEANING_H_
#define COMPONENTS_OMNIBOX_COMMON_STRING_CLEANING_H_

#include <string>

#include "base/strings/utf_offset_string_conversions.h"

class GURL;

namespace string_cleaning {

// Truncates an overly-long URL, unescapes it and interprets the characters as
// UTF-8 (both via `url_formatter::FormatUrl()`), and lower-cases it, returning
// the result. `adjustments`, if non-NULL, is set to reflect the transformations
// the URL spec underwent to become the return value.  If a caller computes
// offsets (e.g., for the position of matched text) in this cleaned-up string,
// it can use `adjustments` to calculate the location of these offsets in the
// original string (via `base::OffsetAdjuster::UnadjustOffsets()`).  This is
// useful if later the original string gets formatted in a different way for
// displaying. In this case, knowing the offsets in the original string will
// allow them to be properly translated to offsets in the newly-formatted
// string.
//
// The unescaping done by this function makes it possible to match substrings
// that were originally escaped for navigation; for example, if the user
// searched for "a&p", the query would be escaped as "a%26p", so without
// unescaping, an input string of "a&p" would no longer match this URL.  Note
// that the resulting unescaped URL may not be directly navigable (which is
// why it was escaped to begin with).
//
// `url` must be a valid URL.
std::u16string CleanUpUrlForMatching(
    const GURL& gurl,
    base::OffsetAdjuster::Adjustments* adjustments);

// Returns the lower-cased title, possibly truncated if the original title is
// overly-long.
std::u16string CleanUpTitleForMatching(const std::u16string& title);

}  // namespace string_cleaning

#endif  // COMPONENTS_OMNIBOX_COMMON_STRING_CLEANING_H_