File: url_util.h

package info (click to toggle)
chromium 120.0.6099.224-1~deb11u1
links: PTS, VCS
area: main
in suites: bullseye
size: 6,112,112 kB
sloc: cpp: 32,907,025; ansic: 8,148,123; javascript: 3,679,536; python: 2,031,248; asm: 959,718; java: 804,675; xml: 617,256; sh: 111,417; objc: 100,835; perl: 88,443; cs: 53,032; makefile: 29,579; fortran: 24,137; php: 21,162; tcl: 21,147; sql: 20,809; ruby: 17,735; pascal: 12,864; yacc: 8,045; lisp: 3,388; lex: 1,323; ada: 727; awk: 329; jsp: 267; csh: 117; exp: 43; sed: 37
file content (328 lines) | stat: -rw-r--r-- 14,540 bytes
parent folder | download | duplicates (2)
// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef URL_URL_UTIL_H_
#define URL_URL_UTIL_H_

#include <memory>
#include <string>
#include <string_view>
#include <vector>

#include "base/component_export.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_canon.h"
#include "url/url_constants.h"

namespace url {

// Init ------------------------------------------------------------------------

// Used for tests that need to reset schemes. Note that this can only be used
// in conjunction with ScopedSchemeRegistryForTests.
COMPONENT_EXPORT(URL) void ClearSchemesForTests();

class ScopedSchemeRegistryInternal;

// Stores the SchemeRegistry upon creation, allowing tests to modify a copy of
// it, and restores the original SchemeRegistry when deleted.
class COMPONENT_EXPORT(URL) ScopedSchemeRegistryForTests {
 public:
  ScopedSchemeRegistryForTests();
  ~ScopedSchemeRegistryForTests();

 private:
  std::unique_ptr<ScopedSchemeRegistryInternal> internal_;
};

// Schemes ---------------------------------------------------------------------

// Changes the behavior of SchemeHostPort / Origin to allow non-standard schemes
// to be specified, instead of canonicalizing them to an invalid SchemeHostPort
// or opaque Origin, respectively. This is used for Android WebView backwards
// compatibility, which allows the use of custom schemes: content hosted in
// Android WebView assumes that one URL with a non-standard scheme will be
// same-origin to another URL with the same non-standard scheme.
//
// Not thread-safe.
COMPONENT_EXPORT(URL) void EnableNonStandardSchemesForAndroidWebView();

// Whether or not SchemeHostPort and Origin allow non-standard schemes.
COMPONENT_EXPORT(URL) bool AllowNonStandardSchemesForAndroidWebView();

// The following Add*Scheme method are not threadsafe and can not be called
// concurrently with any other url_util function. They will assert if the lists
// of schemes have been locked (see LockSchemeRegistries), or used.

// Adds an application-defined scheme to the internal list of "standard-format"
// URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
// URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).

COMPONENT_EXPORT(URL)
void AddStandardScheme(const char* new_scheme, SchemeType scheme_type);

// Returns the list of schemes registered for "standard" URLs.  Note, this
// should not be used if you just need to check if your protocol is standard
// or not.  Instead use the IsStandard() function above as its much more
// efficient.  This function should only be used where you need to perform
// other operations against the standard scheme list.
COMPONENT_EXPORT(URL)
std::vector<std::string> GetStandardSchemes();

// Adds an application-defined scheme to the internal list of schemes allowed
// for referrers.
COMPONENT_EXPORT(URL)
void AddReferrerScheme(const char* new_scheme, SchemeType scheme_type);

// Adds an application-defined scheme to the list of schemes that do not trigger
// mixed content warnings.
COMPONENT_EXPORT(URL) void AddSecureScheme(const char* new_scheme);
COMPONENT_EXPORT(URL) const std::vector<std::string>& GetSecureSchemes();

// Adds an application-defined scheme to the list of schemes that normal pages
// cannot link to or access (i.e., with the same security rules as those applied
// to "file" URLs).
COMPONENT_EXPORT(URL) void AddLocalScheme(const char* new_scheme);
COMPONENT_EXPORT(URL) const std::vector<std::string>& GetLocalSchemes();

// Adds an application-defined scheme to the list of schemes that cause pages
// loaded with them to not have access to pages loaded with any other URL
// scheme.
COMPONENT_EXPORT(URL) void AddNoAccessScheme(const char* new_scheme);
COMPONENT_EXPORT(URL) const std::vector<std::string>& GetNoAccessSchemes();

// Adds an application-defined scheme to the list of schemes that can be sent
// CORS requests.
COMPONENT_EXPORT(URL) void AddCorsEnabledScheme(const char* new_scheme);
COMPONENT_EXPORT(URL) const std::vector<std::string>& GetCorsEnabledSchemes();

// Adds an application-defined scheme to the list of web schemes that can be
// used by web to store data (e.g. cookies, local storage, ...). This is
// to differentiate them from schemes that can store data but are not used on
// web (e.g. application's internal schemes) or schemes that are used on web but
// cannot store data.
COMPONENT_EXPORT(URL) void AddWebStorageScheme(const char* new_scheme);
COMPONENT_EXPORT(URL) const std::vector<std::string>& GetWebStorageSchemes();

// Adds an application-defined scheme to the list of schemes that can bypass the
// Content-Security-Policy (CSP) checks.
COMPONENT_EXPORT(URL) void AddCSPBypassingScheme(const char* new_scheme);
COMPONENT_EXPORT(URL) const std::vector<std::string>& GetCSPBypassingSchemes();

// Adds an application-defined scheme to the list of schemes that are strictly
// empty documents, allowing them to commit synchronously.
COMPONENT_EXPORT(URL) void AddEmptyDocumentScheme(const char* new_scheme);
COMPONENT_EXPORT(URL) const std::vector<std::string>& GetEmptyDocumentSchemes();

// Adds a scheme with a predefined default handler.
//
// This pair of strings must be normalized protocol handler parameters as
// described in the Custom Handler specification.
// https://html.spec.whatwg.org/multipage/system-state.html#normalize-protocol-handler-parameters
COMPONENT_EXPORT(URL)
void AddPredefinedHandlerScheme(const char* new_scheme, const char* handler);
COMPONENT_EXPORT(URL)
std::vector<std::pair<std::string, std::string>> GetPredefinedHandlerSchemes();

// Sets a flag to prevent future calls to Add*Scheme from succeeding.
//
// This is designed to help prevent errors for multithreaded applications.
// Normal usage would be to call Add*Scheme for your custom schemes at
// the beginning of program initialization, and then LockSchemeRegistries. This
// prevents future callers from mistakenly calling Add*Scheme when the
// program is running with multiple threads, where such usage would be
// dangerous.
//
// We could have had Add*Scheme use a lock instead, but that would add
// some platform-specific dependencies we don't otherwise have now, and is
// overkill considering the normal usage is so simple.
COMPONENT_EXPORT(URL) void LockSchemeRegistries();

// Locates the scheme in the given string and places it into |found_scheme|,
// which may be NULL to indicate the caller does not care about the range.
//
// Returns whether the given |compare| scheme matches the scheme found in the
// input (if any). The |compare| scheme must be a valid canonical scheme or
// the result of the comparison is undefined.
COMPONENT_EXPORT(URL)
bool FindAndCompareScheme(const char* str,
                          int str_len,
                          const char* compare,
                          Component* found_scheme);
COMPONENT_EXPORT(URL)
bool FindAndCompareScheme(const char16_t* str,
                          int str_len,
                          const char* compare,
                          Component* found_scheme);
inline bool FindAndCompareScheme(const std::string& str,
                                 const char* compare,
                                 Component* found_scheme) {
  return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
                              compare, found_scheme);
}
inline bool FindAndCompareScheme(const std::u16string& str,
                                 const char* compare,
                                 Component* found_scheme) {
  return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
                              compare, found_scheme);
}

// Returns true if the given scheme identified by |scheme| within |spec| is in
// the list of known standard-format schemes (see AddStandardScheme).
COMPONENT_EXPORT(URL)
bool IsStandard(const char* spec, const Component& scheme);
COMPONENT_EXPORT(URL)
bool IsStandard(const char16_t* spec, const Component& scheme);

// Returns true if the given scheme identified by |scheme| within |spec| is in
// the list of allowed schemes for referrers (see AddReferrerScheme).
COMPONENT_EXPORT(URL)
bool IsReferrerScheme(const char* spec, const Component& scheme);

// Returns true and sets |type| to the SchemeType of the given scheme
// identified by |scheme| within |spec| if the scheme is in the list of known
// standard-format schemes (see AddStandardScheme).
COMPONENT_EXPORT(URL)
bool GetStandardSchemeType(const char* spec,
                           const Component& scheme,
                           SchemeType* type);
COMPONENT_EXPORT(URL)
bool GetStandardSchemeType(const char16_t* spec,
                           const Component& scheme,
                           SchemeType* type);

// Hosts  ----------------------------------------------------------------------

// Returns true if the |canonical_host| matches or is in the same domain as the
// given |canonical_domain| string. For example, if the canonicalized hostname
// is "www.google.com", this will return true for "com", "google.com", and
// "www.google.com" domains.
//
// If either of the input StringPieces is empty, the return value is false. The
// input domain should match host canonicalization rules. i.e. it should be
// lowercase except for escape chars.
COMPONENT_EXPORT(URL)
bool DomainIs(std::string_view canonical_host,
              std::string_view canonical_domain);

// Returns true if the hostname is an IP address. Note: this function isn't very
// cheap, as it must re-parse the host to verify.
COMPONENT_EXPORT(URL) bool HostIsIPAddress(std::string_view host);

// URL library wrappers --------------------------------------------------------

// Parses the given spec according to the extracted scheme type. Normal users
// should use the URL object, although this may be useful if performance is
// critical and you don't want to do the heap allocation for the std::string.
//
// As with the Canonicalize* functions, the charset converter can
// be NULL to use UTF-8 (it will be faster in this case).
//
// Returns true if a valid URL was produced, false if not. On failure, the
// output and parsed structures will still be filled and will be consistent,
// but they will not represent a loadable URL.
COMPONENT_EXPORT(URL)
bool Canonicalize(const char* spec,
                  int spec_len,
                  bool trim_path_end,
                  CharsetConverter* charset_converter,
                  CanonOutput* output,
                  Parsed* output_parsed);
COMPONENT_EXPORT(URL)
bool Canonicalize(const char16_t* spec,
                  int spec_len,
                  bool trim_path_end,
                  CharsetConverter* charset_converter,
                  CanonOutput* output,
                  Parsed* output_parsed);

// Resolves a potentially relative URL relative to the given parsed base URL.
// The base MUST be valid. The resulting canonical URL and parsed information
// will be placed in to the given out variables.
//
// The relative need not be relative. If we discover that it's absolute, this
// will produce a canonical version of that URL. See Canonicalize() for more
// about the charset_converter.
//
// Returns true if the output is valid, false if the input could not produce
// a valid URL.
COMPONENT_EXPORT(URL)
bool ResolveRelative(const char* base_spec,
                     int base_spec_len,
                     const Parsed& base_parsed,
                     const char* relative,
                     int relative_length,
                     CharsetConverter* charset_converter,
                     CanonOutput* output,
                     Parsed* output_parsed);
COMPONENT_EXPORT(URL)
bool ResolveRelative(const char* base_spec,
                     int base_spec_len,
                     const Parsed& base_parsed,
                     const char16_t* relative,
                     int relative_length,
                     CharsetConverter* charset_converter,
                     CanonOutput* output,
                     Parsed* output_parsed);

// Replaces components in the given VALID input URL. The new canonical URL info
// is written to output and out_parsed.
//
// Returns true if the resulting URL is valid.
COMPONENT_EXPORT(URL)
bool ReplaceComponents(const char* spec,
                       int spec_len,
                       const Parsed& parsed,
                       const Replacements<char>& replacements,
                       CharsetConverter* charset_converter,
                       CanonOutput* output,
                       Parsed* out_parsed);
COMPONENT_EXPORT(URL)
bool ReplaceComponents(const char* spec,
                       int spec_len,
                       const Parsed& parsed,
                       const Replacements<char16_t>& replacements,
                       CharsetConverter* charset_converter,
                       CanonOutput* output,
                       Parsed* out_parsed);

// String helper functions -----------------------------------------------------

enum class DecodeURLMode {
  // UTF-8 decode only. Invalid byte sequences are replaced with U+FFFD.
  kUTF8,
  // Try UTF-8 decoding. If the input contains byte sequences invalid
  // for UTF-8, apply byte to Unicode mapping.
  kUTF8OrIsomorphic,
};

// Unescapes the given string using URL escaping rules.
COMPONENT_EXPORT(URL)
void DecodeURLEscapeSequences(std::string_view input,
                              DecodeURLMode mode,
                              CanonOutputW* output);

// Escapes the given string as defined by the JS method encodeURIComponent. See
// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
COMPONENT_EXPORT(URL)
void EncodeURIComponent(std::string_view input, CanonOutput* output);

// Returns true if `c` is a character that does not require escaping in
// encodeURIComponent.
// TODO(crbug.com/1481056): Remove this when event-level reportEvent is removed
// (if it is still this function's only consumer).
COMPONENT_EXPORT(URL)
bool IsURIComponentChar(char c);

// Checks an arbitrary string for invalid escape sequences.
//
// A valid percent-encoding is '%' followed by exactly two hex-digits. This
// function returns true if an occurrence of '%' is found and followed by
// anything other than two hex-digits.
COMPONENT_EXPORT(URL)
bool HasInvalidURLEscapeSequences(std::string_view input);

}  // namespace url

#endif  // URL_URL_UTIL_H_