File: url_parse_internal.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (155 lines) | stat: -rw-r--r-- 6,057 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/350788890): Remove this and spanify to fix the errors.
#pragma allow_unsafe_buffers
#endif

#ifndef URL_URL_PARSE_INTERNAL_H_
#define URL_URL_PARSE_INTERNAL_H_

// Contains common inline helper functions used by the URL parsing routines.

#include "url/third_party/mozilla/url_parse.h"

namespace url {

// A helper function to handle a URL separator, which is '/' or '\'.
//
// The motivation: There are many condition checks in URL Standard like the
// following:
//
// > If url is special and c is U+002F (/) or U+005C (\), ...
inline bool IsSlashOrBackslash(char16_t ch) {
  return ch == '/' || ch == '\\';
}
inline bool IsSlashOrBackslash(char ch) {
  return IsSlashOrBackslash(static_cast<char16_t>(ch));
}

// Returns true if we should trim this character from the URL because it is a
// space or a control character.
inline bool ShouldTrimFromURL(char16_t ch) {
  return ch <= ' ';
}
inline bool ShouldTrimFromURL(char ch) {
  return ShouldTrimFromURL(static_cast<char16_t>(ch));
}

// Given an already-initialized begin index and length, this shrinks the range
// to eliminate "should-be-trimmed" characters. Note that the length does *not*
// indicate the length of untrimmed data from |*begin|, but rather the position
// in the input string (so the string starts at character |*begin| in the spec,
// and goes until |*len|).
template<typename CHAR>
inline void TrimURL(const CHAR* spec, int* begin, int* len,
                    bool trim_path_end = true) {
  // Strip leading whitespace and control characters.
  while (*begin < *len && ShouldTrimFromURL(spec[*begin]))
    (*begin)++;

  if (trim_path_end) {
    // Strip trailing whitespace and control characters. We need the >i test
    // for when the input string is all blanks; we don't want to back past the
    // input.
    while (*len > *begin && ShouldTrimFromURL(spec[*len - 1]))
      (*len)--;
  }
}

// Counts the number of consecutive slashes or backslashes starting at the given
// offset in the given string of the given length. A slash and backslash can be
// mixed.
//
// TODO(crbug.com/40063064): Rename this function to
// `CountConsecutiveSlashesOrBackslashes`.
template <typename CHAR>
inline int CountConsecutiveSlashes(const CHAR* str,
                                   int begin_offset,
                                   int str_len) {
  int count = 0;
  while (begin_offset + count < str_len &&
         IsSlashOrBackslash(str[begin_offset + count])) {
    ++count;
  }
  return count;
}

// Returns true if char is a slash.
inline bool IsSlash(char16_t ch) {
  return ch == '/';
}
inline bool IsSlash(char ch) {
  return IsSlash(static_cast<char16_t>(ch));
}

// Counts the number of consecutive slashes starting at the given offset
// in the given string of the given length.
//
// TODO(crbug.com/40063064): Rename this function to
// `CountConsecutiveSlashes` after the current `CountConsecutiveSlashes` is
// renamed to CountConsecutiveSlashesOrBackslashes`.
template <typename CHAR>
inline int CountConsecutiveSlashesButNotCountBackslashes(const CHAR* str,
                                                         int begin_offset,
                                                         int str_len) {
  int count = 0;
  while (begin_offset + count < str_len && IsSlash(str[begin_offset + count])) {
    ++count;
  }
  return count;
}

// Internal functions in url_parse.cc that parse the path, that is, everything
// following the authority section. The input is the range of everything
// following the authority section, and the output is the identified ranges.
//
// This is designed for the file URL parser or other consumers who may do
// special stuff at the beginning, but want regular path parsing, it just
// maps to the internal parsing function for paths.
void ParsePathInternal(const char* spec,
                       const Component& path,
                       Component* filepath,
                       Component* query,
                       Component* ref);
void ParsePathInternal(const char16_t* spec,
                       const Component& path,
                       Component* filepath,
                       Component* query,
                       Component* ref);

// Internal functions in url_parse.cc that parse non-special URLs, which are
// similar to `ParseNonSpecialURL` functions in url_parse.h, but with
// `trim_path_end` parameter that controls whether to trim path end or not.
Parsed ParseNonSpecialURLInternal(std::string_view url, bool trim_path_end);
Parsed ParseNonSpecialURLInternal(std::u16string_view url, bool trim_path_end);

// Given a spec and a pointer to the character after the colon following the
// special scheme, this parses it and fills in the structure, Every item in the
// parsed structure is filled EXCEPT for the scheme, which is untouched.
void ParseAfterSpecialScheme(const char* spec,
                             int spec_len,
                             int after_scheme,
                             Parsed* parsed);
void ParseAfterSpecialScheme(const char16_t* spec,
                             int spec_len,
                             int after_scheme,
                             Parsed* parsed);

// Given a spec and a pointer to the character after the colon following the
// non-special scheme, this parses it and fills in the structure, Every item in
// the parsed structure is filled EXCEPT for the scheme, which is untouched.
void ParseAfterNonSpecialScheme(const char* spec,
                                int spec_len,
                                int after_scheme,
                                Parsed* parsed);
void ParseAfterNonSpecialScheme(const char16_t* spec,
                                int spec_len,
                                int after_scheme,
                                Parsed* parsed);

}  // namespace url

#endif  // URL_URL_PARSE_INTERNAL_H_