File: string_cleaning.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (46 lines) | stat: -rw-r--r-- 1,977 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_OMNIBOX_COMMON_STRING_CLEANING_H_
#define COMPONENTS_OMNIBOX_COMMON_STRING_CLEANING_H_

#include <string>

#include "base/strings/utf_offset_string_conversions.h"

class GURL;

namespace string_cleaning {

// Truncates an overly-long URL, unescapes it and interprets the characters as
// UTF-8 (both via `url_formatter::FormatUrl()`), and lower-cases it, returning
// the result. `adjustments`, if non-NULL, is set to reflect the transformations
// the URL spec underwent to become the return value.  If a caller computes
// offsets (e.g., for the position of matched text) in this cleaned-up string,
// it can use `adjustments` to calculate the location of these offsets in the
// original string (via `base::OffsetAdjuster::UnadjustOffsets()`).  This is
// useful if later the original string gets formatted in a different way for
// displaying. In this case, knowing the offsets in the original string will
// allow them to be properly translated to offsets in the newly-formatted
// string.
//
// The unescaping done by this function makes it possible to match substrings
// that were originally escaped for navigation; for example, if the user
// searched for "a&p", the query would be escaped as "a%26p", so without
// unescaping, an input string of "a&p" would no longer match this URL.  Note
// that the resulting unescaped URL may not be directly navigable (which is
// why it was escaped to begin with).
//
// `url` must be a valid URL.
std::u16string CleanUpUrlForMatching(
    const GURL& gurl,
    base::OffsetAdjuster::Adjustments* adjustments);

// Returns the lower-cased title, possibly truncated if the original title is
// overly-long.
std::u16string CleanUpTitleForMatching(const std::u16string& title);

}  // namespace string_cleaning

#endif  // COMPONENTS_OMNIBOX_COMMON_STRING_CLEANING_H_