1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
|
// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/blink/renderer/core/fetch/multipart_parser.h"
#include <algorithm>
#include <utility>
#include "base/containers/span.h"
#include "third_party/blink/public/platform/platform.h"
#include "third_party/blink/renderer/platform/network/http_names.h"
#include "third_party/blink/renderer/platform/network/http_parsers.h"
#include "third_party/blink/renderer/platform/wtf/std_lib_extras.h"
namespace blink {
namespace {
constexpr char kCloseDelimiterSuffix[] = "--\r\n";
constexpr size_t kDashBoundaryOffset = 2u; // The length of "\r\n".
constexpr char kDelimiterSuffix[] = "\r\n";
} // namespace
MultipartParser::Matcher::Matcher() = default;
MultipartParser::Matcher::Matcher(base::span<const char> match_data,
size_t num_matched_bytes)
: match_data_(match_data), num_matched_bytes_(num_matched_bytes) {}
bool MultipartParser::Matcher::Match(base::span<const char> data) {
for (const char c : data) {
if (!Match(c)) {
return false;
}
}
return true;
}
void MultipartParser::Matcher::SetNumMatchedBytes(size_t num_matched_bytes) {
DCHECK_LE(num_matched_bytes, match_data_.size());
num_matched_bytes_ = num_matched_bytes;
}
MultipartParser::MultipartParser(Vector<char> boundary, Client* client)
: client_(client),
delimiter_(std::move(boundary)),
state_(State::kParsingPreamble) {
// The delimiter consists of "\r\n" and a dash boundary which consists of
// "--" and a boundary.
delimiter_.push_front("\r\n--", 4u);
matcher_ = DelimiterMatcher(kDashBoundaryOffset);
}
bool MultipartParser::AppendData(base::span<const char> bytes) {
DCHECK_NE(State::kFinished, state_);
DCHECK_NE(State::kCancelled, state_);
while (!bytes.empty()) {
switch (state_) {
case State::kParsingPreamble:
// Parse either a preamble and a delimiter or a dash boundary.
ParseDelimiter(bytes);
if (!matcher_.IsMatchComplete() && !bytes.empty()) {
// Parse a preamble data (by ignoring it) and then a delimiter.
matcher_.SetNumMatchedBytes(0u);
ParseDataAndDelimiter(bytes);
}
if (matcher_.IsMatchComplete()) {
// Prepare for a delimiter suffix.
matcher_ = DelimiterSuffixMatcher();
state_ = State::kParsingDelimiterSuffix;
}
break;
case State::kParsingDelimiterSuffix:
// Parse transport padding and "\r\n" after a delimiter.
// This state can be reached after either a preamble or part
// octets are parsed.
if (matcher_.NumMatchedBytes() == 0u) {
ParseTransportPadding(bytes);
}
while (!bytes.empty()) {
if (!matcher_.Match(bytes.front())) {
return false;
}
bytes = bytes.subspan(1u);
if (matcher_.IsMatchComplete()) {
// Prepare for part header fields.
state_ = State::kParsingPartHeaderFields;
break;
}
}
break;
case State::kParsingPartHeaderFields: {
// Parse part header fields (which ends with "\r\n") and an empty
// line (which also ends with "\r\n").
// This state can be reached after a delimiter and a delimiter
// suffix after either a preamble or part octets are parsed.
HTTPHeaderMap header_fields;
if (ParseHeaderFields(bytes, &header_fields)) {
// Prepare for part octets.
matcher_ = DelimiterMatcher();
state_ = State::kParsingPartOctets;
client_->PartHeaderFieldsInMultipartReceived(header_fields);
}
break;
}
case State::kParsingPartOctets: {
// Parse part octets and a delimiter.
// This state can be reached only after part header fields are
// parsed.
const size_t num_initially_matched_bytes = matcher_.NumMatchedBytes();
auto bytes_before = bytes;
ParseDelimiter(bytes);
if (!matcher_.IsMatchComplete() && !bytes.empty()) {
if (matcher_.NumMatchedBytes() >= num_initially_matched_bytes &&
num_initially_matched_bytes > 0u) {
// Since the matched bytes did not form a complete
// delimiter, the matched bytes turned out to be octet
// bytes instead of being delimiter bytes. Additionally,
// some of the matched bytes are from the previous call and
// are therefore not in the `bytes_before` span.
client_->PartDataInMultipartReceived(matcher_.MatchedData());
if (state_ != State::kParsingPartOctets)
break;
bytes_before = bytes;
}
matcher_.SetNumMatchedBytes(0u);
ParseDataAndDelimiter(bytes);
const size_t skipped_size = bytes_before.size() - bytes.size();
if (skipped_size > matcher_.NumMatchedBytes()) {
size_t payload_size = skipped_size - matcher_.NumMatchedBytes();
auto payload = bytes_before.first(payload_size);
client_->PartDataInMultipartReceived(payload);
if (state_ != State::kParsingPartOctets)
break;
}
}
if (matcher_.IsMatchComplete()) {
state_ = State::kParsingDelimiterOrCloseDelimiterSuffix;
client_->PartDataInMultipartFullyReceived();
}
break;
}
case State::kParsingDelimiterOrCloseDelimiterSuffix:
// Determine whether this is a delimiter suffix or a close
// delimiter suffix.
// This state can be reached only after part octets are parsed.
if (bytes.front() == '-') {
// Prepare for a close delimiter suffix.
matcher_ = CloseDelimiterSuffixMatcher();
state_ = State::kParsingCloseDelimiterSuffix;
} else {
// Prepare for a delimiter suffix.
matcher_ = DelimiterSuffixMatcher();
state_ = State::kParsingDelimiterSuffix;
}
break;
case State::kParsingCloseDelimiterSuffix:
// Parse "--", transport padding and "\r\n" after a delimiter
// (a delimiter and "--" constitute a close delimiter).
// This state can be reached only after part octets are parsed.
for (;;) {
if (matcher_.NumMatchedBytes() == 2u) {
ParseTransportPadding(bytes);
}
if (bytes.empty()) {
break;
}
if (!matcher_.Match(bytes.front())) {
return false;
}
bytes = bytes.subspan(1u);
if (matcher_.IsMatchComplete()) {
// Prepare for an epilogue.
state_ = State::kParsingEpilogue;
break;
}
}
break;
case State::kParsingEpilogue:
// Parse an epilogue (by ignoring it).
// This state can be reached only after a delimiter and a close
// delimiter suffix after part octets are parsed.
return true;
case State::kCancelled:
case State::kFinished:
// The client changed the state.
return false;
}
}
DCHECK(bytes.empty());
return true;
}
void MultipartParser::Cancel() {
state_ = State::kCancelled;
}
bool MultipartParser::Finish() {
DCHECK_NE(State::kCancelled, state_);
DCHECK_NE(State::kFinished, state_);
const State initial_state = state_;
state_ = State::kFinished;
switch (initial_state) {
case State::kParsingPartOctets:
if (matcher_.NumMatchedBytes() > 0u) {
// Since the matched bytes did not form a complete delimiter,
// the matched bytes turned out to be octet bytes instead of being
// delimiter bytes.
client_->PartDataInMultipartReceived(matcher_.MatchedData());
}
return false;
case State::kParsingCloseDelimiterSuffix:
// Require a full close delimiter consisting of a delimiter and "--"
// but ignore missing or partial "\r\n" after that.
return matcher_.NumMatchedBytes() >= 2u;
case State::kParsingEpilogue:
return true;
default:
return false;
}
}
MultipartParser::Matcher MultipartParser::CloseDelimiterSuffixMatcher() const {
return Matcher(base::span_from_cstring(kCloseDelimiterSuffix), 0u);
}
MultipartParser::Matcher MultipartParser::DelimiterMatcher(
size_t num_already_matched_bytes) const {
return Matcher(delimiter_, num_already_matched_bytes);
}
MultipartParser::Matcher MultipartParser::DelimiterSuffixMatcher() const {
return Matcher(base::span_from_cstring(kDelimiterSuffix), 0u);
}
void MultipartParser::ParseDataAndDelimiter(base::span<const char>& bytes) {
DCHECK_EQ(0u, matcher_.NumMatchedBytes());
// Search for a complete delimiter within the bytes.
auto found_delimiter = std::ranges::search(bytes, delimiter_);
if (found_delimiter.begin() != bytes.end()) {
// A complete delimiter was found. The bytes before that are octet
// bytes.
auto delimiter_and_rest = bytes.subspan(
static_cast<size_t>(found_delimiter.begin() - bytes.begin()));
auto [delimiter, rest] = delimiter_and_rest.split_at(delimiter_.size());
const bool matched = matcher_.Match(delimiter);
DCHECK(matched);
DCHECK(matcher_.IsMatchComplete());
bytes = rest;
} else {
// Search for a partial delimiter in the end of the bytes.
auto maybe_delimiter_span = bytes.last(
std::min(static_cast<size_t>(delimiter_.size() - 1u), bytes.size()));
while (!maybe_delimiter_span.empty()) {
if (matcher_.Match(maybe_delimiter_span)) {
break;
}
maybe_delimiter_span = maybe_delimiter_span.subspan(1u);
matcher_.SetNumMatchedBytes(0u);
}
// If a partial delimiter was found in the end of bytes, the bytes
// before the partial delimiter are definitely octets bytes and
// the partial delimiter bytes are buffered for now.
// If a partial delimiter was not found in the end of bytes, all bytes
// are definitely octets bytes.
// In all cases, all bytes are parsed now.
bytes = {};
}
DCHECK(matcher_.IsMatchComplete() || bytes.empty());
}
void MultipartParser::ParseDelimiter(base::span<const char>& bytes) {
DCHECK(!matcher_.IsMatchComplete());
size_t matched = 0;
while (matched < bytes.size() && matcher_.Match(bytes[matched])) {
++matched;
if (matcher_.IsMatchComplete())
break;
}
bytes = bytes.subspan(matched);
}
bool MultipartParser::ParseHeaderFields(base::span<const char>& bytes,
HTTPHeaderMap* header_fields) {
// Combine the current bytes with buffered header bytes if needed.
if (bytes.size() > std::numeric_limits<wtf_size_t>::max()) {
return false;
}
auto header_bytes = bytes;
if (!buffered_header_bytes_.empty()) {
buffered_header_bytes_.AppendSpan(header_bytes);
header_bytes = buffered_header_bytes_;
}
wtf_size_t end = 0u;
if (!ParseMultipartFormHeadersFromBody(base::as_bytes(header_bytes),
header_fields, &end)) {
// Store the current header bytes for the next call unless that has
// already been done.
if (buffered_header_bytes_.empty()) {
buffered_header_bytes_.AppendSpan(header_bytes);
}
bytes = {};
return false;
}
buffered_header_bytes_.clear();
bytes = bytes.last(header_bytes.size() - end);
return true;
}
void MultipartParser::ParseTransportPadding(
base::span<const char>& bytes) const {
size_t matched = 0;
while (matched < bytes.size() &&
(bytes[matched] == '\t' || bytes[matched] == ' ')) {
++matched;
}
bytes = bytes.subspan(matched);
}
void MultipartParser::Trace(Visitor* visitor) const {
visitor->Trace(client_);
}
} // namespace blink
|