File: http_util.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (487 lines) | stat: -rw-r--r-- 21,975 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef NET_HTTP_HTTP_UTIL_H_
#define NET_HTTP_HTTP_UTIL_H_

#include <stddef.h>
#include <stdint.h>

#include <optional>
#include <set>
#include <string>
#include <string_view>
#include <vector>

#include "base/compiler_specific.h"
#include "base/containers/span.h"
#include "base/strings/string_tokenizer.h"
#include "base/time/time.h"
#include "net/base/net_export.h"
#include "net/http/http_byte_range.h"
#include "net/http/http_version.h"
#include "url/gurl.h"
#include "url/origin.h"

// This is a macro to support extending this string literal at compile time.
// Please excuse me polluting your global namespace!
#define HTTP_LWS " \t"

namespace net {

class HttpResponseHeaders;

class NET_EXPORT HttpUtil {
 public:
  // Generates a request line that is used for text-based HTTP messaging.
  static std::string GenerateRequestLine(std::string_view method,
                                         const GURL& url,
                                         bool is_for_get_to_http_proxy);

  // Returns the absolute URL, to be used for the http request. This url is
  // made up of the protocol, host, [port], path, [query]. Everything else
  // is stripped (username, password, reference).
  static std::string SpecForRequest(const GURL& url);

  // Parses the value of a Content-Type header.  |mime_type|, |charset|, and
  // |had_charset| output parameters must be valid pointers.  |boundary| may be
  // nullptr.  |*mime_type| and |*charset| should be empty and |*had_charset|
  // false when called with the first Content-Type header value in a given
  // header list.
  //
  // ParseContentType() supports parsing multiple Content-Type headers in the
  // same header list.  For this operation, subsequent calls should pass in the
  // same |mime_type|, |charset|, and |had_charset| arguments without clearing
  // them.
  //
  // The resulting mime_type and charset values are normalized to lowercase.
  // The mime_type and charset output values are only modified if the
  // content_type_str contains a mime type and charset value, respectively.  If
  // |boundary| is not null, then |*boundary| will be assigned the (unquoted)
  // value of the boundary parameter, if any.
  static void ParseContentType(std::string_view content_type_str,
                               std::string* mime_type,
                               std::string* charset,
                               bool* had_charset,
                               std::string* boundary);

  // Parses the value of a "Range" header as defined in RFC 7233 Section 2.1.
  // https://tools.ietf.org/html/rfc7233#section-2.1
  // Returns false on failure.
  static bool ParseRangeHeader(const std::string& range_specifier,
                               std::vector<HttpByteRange>* ranges);

  // Extracts the values in a Content-Range header and returns true if all three
  // values are present and valid for a 206 response; otherwise returns false.
  // The following values will be outputted:
  // |*first_byte_position| = inclusive position of the first byte of the range
  // |*last_byte_position| = inclusive position of the last byte of the range
  // |*instance_length| = size in bytes of the object requested
  // If this method returns false, then all of the outputs will be -1.
  static bool ParseContentRangeHeaderFor206(std::string_view content_range_spec,
                                            int64_t* first_byte_position,
                                            int64_t* last_byte_position,
                                            int64_t* instance_length);

  // Parses a Retry-After header that is either an absolute date/time or a
  // number of seconds in the future. Interprets absolute times as relative to
  // |now|. If |retry_after_string| is successfully parsed and indicates a time
  // that is not in the past, fills in |*retry_after| and returns true;
  // otherwise, returns false.
  static bool ParseRetryAfterHeader(const std::string& retry_after_string,
                                    base::Time now,
                                    base::TimeDelta* retry_after);

  // Formats a time in the IMF-fixdate format defined by RFC 7231 (satisfying
  // its HTTP-date format).
  //
  // This behaves identically to the function in base/i18n/time_formatting.h. It
  // is reimplemented here since net/ cannot depend on base/i18n/.
  static std::string TimeFormatHTTP(base::Time time);

  // Returns true if the request method is "safe" (per section 4.2.1 of
  // RFC 7231).
  static bool IsMethodSafe(std::string_view method);

  // Returns true if the request method is idempotent (per section 4.2.2 of
  // RFC 7231).
  static bool IsMethodIdempotent(std::string_view method);

  // Returns true if it is safe to allow users and scripts to specify a header
  // with a given |name| and |value|.
  // See https://fetch.spec.whatwg.org/#forbidden-request-header.
  // Does not check header validity.
  static bool IsSafeHeader(std::string_view name, std::string_view value);

  // Returns true if |name| is a valid HTTP header name.
  static bool IsValidHeaderName(std::string_view name);

  // Returns false if |value| contains NUL or CRLF. This method does not perform
  // a fully RFC-2616-compliant header value validation.
  static bool IsValidHeaderValue(std::string_view value);

  // Multiple occurances of some headers cannot be coalesced into a comma-
  // separated list since their values are (or contain) unquoted HTTP-date
  // values, which may contain a comma (see RFC 2616 section 3.3.1).
  static bool IsNonCoalescingHeader(std::string_view name);

  // Return true if the character is HTTP "linear white space" (SP | HT).
  // This definition corresponds with the HTTP_LWS macro, and does not match
  // newlines.
  //
  // ALWAYS_INLINE to force inlining even when compiled with -Oz in Clang.
  ALWAYS_INLINE static bool IsLWS(char c) {
    constexpr std::string_view kWhiteSpaceCharacters(HTTP_LWS);
    // Clang performs this optimization automatically at -O3, but Android is
    // compiled at -Oz, so we need to do it by hand.
    static_assert(kWhiteSpaceCharacters == " \t");
    return c == ' ' || c == '\t';
  }

  // Trim HTTP_LWS chars from the beginning and end of the string.
  static void TrimLWS(std::string::const_iterator* begin,
                      std::string::const_iterator* end);
  static std::string_view TrimLWS(std::string_view string);
  // This operates on the substring of `string` between `begin_offset` and
  // `end_offset`, for consumers that need to know offsets relative to the
  // original string.
  static void TrimLWS(std::string_view string,
                      size_t& begin_offset,
                      size_t& end_offset);

  // Whether the character is a valid |tchar| as defined in RFC 7230 Sec 3.2.6.
  static bool IsTokenChar(char c);
  // Whether the string is a valid |token| as defined in RFC 7230 Sec 3.2.6.
  static bool IsToken(std::string_view str);

  // Whether the character is a control character (CTL) as defined in RFC 5234
  // Appendix B.1.
  static inline bool IsControlChar(char c) {
    return (c >= 0x00 && c <= 0x1F) || c == 0x7F;
  }

  // Whether the string is a valid |parmname| as defined in RFC 5987 Sec 3.2.1.
  static bool IsParmName(std::string_view str);

  // RFC 2616 Sec 2.2:
  // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
  // Unquote() strips the surrounding quotemarks off a string, and unescapes
  // any quoted-pair to obtain the value contained by the quoted-string.
  // If the input is not quoted, then it works like the identity function.
  static std::string Unquote(std::string_view str);

  // Similar to Unquote(), but additionally validates that the string being
  // unescaped actually is a valid quoted string. Returns false for an empty
  // string, a string without quotes, a string with mismatched quotes, and
  // a string with unescaped embeded quotes.
  [[nodiscard]] static bool StrictUnquote(std::string_view str,
                                          std::string* out);

  // The reverse of Unquote() -- escapes and surrounds with "
  static std::string Quote(std::string_view str);

  // Returns the start of the status line, or std::string::npos if no status
  // line was found. This allows for 4 bytes of junk to precede the status line
  // (which is what Mozilla does too).
  static size_t LocateStartOfStatusLine(base::span<const uint8_t> buf);

  // Returns index beyond the end-of-headers marker or std::string::npos if not
  // found.  RFC 2616 defines the end-of-headers marker as a double CRLF;
  // however, some servers only send back LFs (e.g., Unix-based CGI scripts
  // written using the ASIS Apache module).  This function therefore accepts the
  // pattern LF[CR]LF as end-of-headers (just like Mozilla). The first line of
  // |buf| is considered the status line, even if empty. The parameter |i| is
  // the offset within |buf| to begin searching from.
  static size_t LocateEndOfHeaders(base::span<const uint8_t> buf, size_t i = 0);

  // Same as |LocateEndOfHeaders|, but does not expect a status line, so can be
  // used on multi-part responses or HTTP/1.x trailers.  As a result, if |buf|
  // starts with a single [CR]LF,  it is considered an empty header list, as
  // opposed to an empty status line above a header list.
  static size_t LocateEndOfAdditionalHeaders(base::span<const uint8_t> buf,
                                             size_t i = 0);

  // Assemble "raw headers" in the format required by HttpResponseHeaders.
  // This involves normalizing line terminators, converting [CR]LF to \0 and
  // handling HTTP line continuations (i.e., lines starting with LWS are
  // continuations of the previous line). |buf| should end at the
  // end-of-headers marker as defined by LocateEndOfHeaders. If a \0 appears
  // within the headers themselves, it will be stripped. This is a workaround to
  // avoid later code from incorrectly interpreting it as a line terminator.
  //
  // TODO(crbug.com/40496844): Should remove or internalize this to
  //                         HttpResponseHeaders.
  static std::string AssembleRawHeaders(std::string_view buf);

  // Converts assembled "raw headers" back to the HTTP response format. That is
  // convert each \0 occurence to CRLF. This is used by DevTools.
  // Since all line continuations info is already lost at this point, the result
  // consists of status line and then one line for each header.
  static std::string ConvertHeadersBackToHTTPResponse(const std::string& str);

  // Given a comma separated ordered list of language codes, return an expanded
  // list by adding the base language from language-region pair if it doesn't
  // already exist. This increases the chances of language matching in many
  // cases as explained at this w3c doc:
  // https://www.w3.org/International/questions/qa-lang-priorities#langtagdetail
  // Note that we do not support Q values (e.g. ;q=0.9) in |language_prefs|.
  static std::string ExpandLanguageList(const std::string& language_prefs);

  // Given a comma separated ordered list of language codes, return
  // the list with a qvalue appended to each language.
  // The way qvalues are assigned is rather simple. The qvalue
  // starts with 1.0 and is decremented by 0.1 for each successive entry
  // in the list until it reaches 0.1. All the entries after that are
  // assigned the same qvalue of 0.1. Also, note that the 1st language
  // will not have a qvalue added because the absence of a qvalue implicitly
  // means q=1.0.
  //
  // When making a http request, this should be used to determine what
  // to put in Accept-Language header. If a comma separated list of language
  // codes *without* qvalue is sent, web servers regard all
  // of them as having q=1.0 and pick one of them even though it may not
  // be at the beginning of the list (see http://crbug.com/5899).
  static std::string GenerateAcceptLanguageHeader(
      const std::string& raw_language_list);

  // Returns true if the parameters describe a response with a strong etag or
  // last-modified header.  See section 13.3.3 of RFC 2616.
  //
  // Non-nullopt times will be converted to std::strings and parsed, which can
  // be somewhat expensive.
  //
  // Note that HasStringValidators() being true for a set of headers implies
  // HasValidators() is also true.
  static bool HasStrongValidators(
      HttpVersion version,
      std::optional<std::string_view> etag_header,
      std::optional<std::string_view> last_modified_header,
      std::optional<std::string_view> date_header);

  // Returns true if this response has any validator (either a Last-Modified or
  // an ETag) regardless of whether it is strong or weak.  See section 13.3.3 of
  // RFC 2616.
  //
  // Non-nullopt times will be converted to std::strings and parsed, which can
  // be somewhat expensive.
  static bool HasValidators(
      HttpVersion version,
      std::optional<std::string_view> etag_header,
      std::optional<std::string_view> last_modified_header);

  // Gets a vector of common HTTP status codes for histograms of status
  // codes.  Currently returns everything in the range [100, 600), plus 0
  // (for invalid responses/status codes).
  static std::vector<int> GetStatusCodesForHistogram();

  // Maps an HTTP status code to one of the status codes in the vector
  // returned by GetStatusCodesForHistogram.
  static int MapStatusCodeForHistogram(int code);

  // Returns true if |accept_encoding| is well-formed.  Parsed encodings turned
  // to lower case, are placed to provided string-set. Resulting set is
  // augmented to fulfill the RFC 2616 and RFC 7231 recommendations, e.g. if
  // there is no encodings specified, then {"*"} is returned to denote that
  // client has to encoding preferences (but it does not imply that the
  // user agent will be able to correctly process all encodings).
  static bool ParseAcceptEncoding(const std::string& accept_encoding,
                                  std::set<std::string>* allowed_encodings);

  // Returns true if |content_encoding| is well-formed.  Parsed encodings turned
  // to lower case, are placed to provided string-set. See sections 14.11 and
  // 3.5 of RFC 2616.
  static bool ParseContentEncoding(const std::string& content_encoding,
                                   std::set<std::string>* used_encodings);

  // Return true if `headers` contain multiple `field_name` fields with
  // different values.
  static bool HeadersContainMultipleCopiesOfField(
      const HttpResponseHeaders& headers,
      const std::string& field_name);

  // Used to iterate over the name/value pairs of HTTP headers.  To iterate
  // over the values in a multi-value header, use ValuesIterator.
  // See AssembleRawHeaders for joining line continuations (this iterator
  // does not expect any).
  class NET_EXPORT HeadersIterator {
   public:
    // The data `headers` points to must outlive `this`. GetNext() must be
    // called before any other method.
    HeadersIterator(std::string_view headers,
                    const std::string& line_delimiter);
    ~HeadersIterator();

    // Advances the iterator to the next header, if any.  Returns true if there
    // is a next header.  Use name* and values* methods to access the resultant
    // header name and values.
    bool GetNext();

    void Reset() { lines_.Reset(); }

    size_t name_begin() const { return name_begin_; }
    size_t name_end() const { return name_end_; }
    std::string name() const { return std::string(name_piece()); }
    std::string_view name_piece() const {
      return headers_.substr(name_begin_, name_end_ - name_begin_);
    }

    size_t values_begin() const { return values_begin_; }
    size_t values_end() const { return values_end_; }
    std::string values() const { return std::string(values_piece()); }
    std::string_view values_piece() const {
      return headers_.substr(values_begin_, values_end_ - values_begin_);
    }

   private:
    // The full set of input headers.
    const std::string_view headers_;

    // Tokenizer over `headers_`.
    base::StringViewTokenizer lines_;

    // Start/end of the corresponding fields, relative to the start of
    // `headers_`.
    size_t name_begin_ = 0;
    size_t name_end_ = 0;
    size_t values_begin_ = 0;
    size_t values_end_ = 0;
  };

  // Iterates over delimited values in an HTTP header.  HTTP LWS is
  // automatically trimmed from the resulting values.
  //
  // When using this class to iterate over response header values, be aware that
  // for some headers (e.g., Last-Modified), commas are not used as delimiters.
  // This iterator should be avoided for headers like that which are considered
  // non-coalescing (see IsNonCoalescingHeader).
  //
  // This iterator is careful to skip over delimiters found inside an HTTP
  // quoted string.
  class NET_EXPORT ValuesIterator {
   public:
    ValuesIterator(std::string_view values,
                   char delimiter,
                   bool ignore_empty_values = true);

    ValuesIterator(const ValuesIterator& other);
    ~ValuesIterator();

    // Advances the iterator to the next value, if any.  Returns true if there
    // is a next value.  Use value* methods to access the resultant value.
    bool GetNext();

    std::string_view value() const {
      return values_.substr(value_begin_, value_end_ - value_begin_);
    }

    // The begin/end offsets of the current value, relative to the start of
    // `values`.
    size_t value_begin() const { return value_begin_; }
    size_t value_end() const { return value_end_; }

   private:
    // The original input value.
    std::string_view values_;

    bool ignore_empty_values_;

    base::StringViewTokenizer tokenizer_;

    // These internally track the range of the current value withint `values_`,
    // to can provide begin/end indices for the current value for
    // HttpResponseHeaders, the only consumer that needs them.
    size_t value_begin_ = 0u;
    size_t value_end_ = 0u;
  };

  // Iterates over a delimited sequence of name-value pairs in an HTTP header.
  // Each pair consists of a token (the name), an equals sign, and either a
  // token or quoted-string (the value). Arbitrary HTTP LWS is permitted outside
  // of and between names, values, and delimiters.
  //
  // String iterators returned from this class' methods may be invalidated upon
  // calls to GetNext() or after the NameValuePairsIterator is destroyed.
  class NET_EXPORT NameValuePairsIterator {
   public:
    // Whether or not values are optional. Values::NOT_REQUIRED allows
    // e.g. name1=value1;name2;name3=value3, whereas Vaues::REQUIRED
    // will treat it as a parse error because name2 does not have a
    // corresponding equals sign.
    enum class Values { NOT_REQUIRED, REQUIRED };

    // Whether or not unmatched quotes should be considered a failure. By
    // default this class is pretty lenient and does a best effort to parse
    // values with mismatched quotes. When set to STRICT_QUOTES a value with
    // mismatched or otherwise invalid quotes is considered a parse error.
    enum class Quotes { STRICT_QUOTES, NOT_STRICT };

    NameValuePairsIterator(std::string_view value,
                           char delimiter,
                           Values optional_values = Values::REQUIRED,
                           Quotes strict_quotes = Quotes::NOT_STRICT);

    NameValuePairsIterator(const NameValuePairsIterator& other);

    ~NameValuePairsIterator();

    // Advances the iterator to the next pair, if any.  Returns true if there
    // is a next pair. Returns false on completion or on error. In the error
    // case, `valid()` will return false. Once GetNext() returns false, whether
    // due to error or completion, it should not be called again. Use name() and
    // value() methods to access the resultant value.
    //
    // Each call will invalidate the string views obtained through the previous
    // GetNext() call, as they may point to temporary buffers.
    bool GetNext();

    // Returns false if there was a parse error.
    bool valid() const { return valid_; }

    // The name of the current name-value pair.
    std::string_view name() const { return name_; }

    // The value of the current name-value pair. Note that the returned
    // string_view will be invalidated by the next GetNext() call.
    std::string_view value() const LIFETIME_BOUND {
      return value_is_quoted_ ? unquoted_value_ : value_;
    }

    bool value_is_quoted() const { return value_is_quoted_; }

    // The value before unquoting (if any).
    std::string_view raw_value() const LIFETIME_BOUND { return value_; }

   private:
    // Attempts to parse `name_value_pair`, populating `name_`, `value_`, and
    // `unquoted_value_`. returns false on failure. On failure, the caller
    // should clear those values, to ensure consistent behavior.
    bool ParseNameValuePair(std::string_view name_value_pair);

    HttpUtil::ValuesIterator props_;
    bool valid_ = true;

    std::string_view name_;
    std::string_view value_;

    // Do not store iterators into this string. The NameValuePairsIterator
    // is copyable/assignable, and if copied the copy's iterators would point
    // into the original's unquoted_value_ member.
    std::string unquoted_value_;

    bool value_is_quoted_ = false;

    // True if values are required for each name/value pair; false if a
    // name is permitted to appear without a corresponding value.
    bool values_optional_;

    // True if quotes values are required to be properly quoted; false if
    // mismatched quotes and other problems with quoted values should be more
    // or less gracefully treated as valid.
    bool strict_quotes_;
  };
};

}  // namespace net

#endif  // NET_HTTP_HTTP_UTIL_H_