1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
|
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "services/network/orb/orb_impl.h"
#include "base/check.h"
#include "base/metrics/histogram_functions.h"
#include "base/rand_util.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "net/base/mime_sniffer.h"
#include "net/http/http_util.h"
#include "net/url_request/url_request.h"
#include "services/network/orb/orb_mimetypes.h"
#include "services/network/orb/orb_sniffers.h"
#include "services/network/public/cpp/features.h"
#include "services/network/public/cpp/resource_request.h"
#include "services/network/public/mojom/url_response_head.mojom.h"
using Decision = network::orb::ResponseAnalyzer::Decision;
namespace network::orb {
namespace {
bool IsNonSniffableImageMimeType(std::string_view mime_type) {
// TODO(lukasza): Once full Javascript sniffing is implemented, we may start
// to undesirably block future (=unsniffable) image formats. We should
// explicitly recognize MIME types of such image formats below. See also
// https://github.com/annevk/orb/issues/3#issuecomment-974334651
// This function returns true for image formats that are not recognized by
// net::SniffMimeTypeFromLocalData. This helps to allow such images.
return base::EqualsCaseInsensitiveASCII(mime_type, "image/svg+xml");
}
bool IsAudioOrVideoMimeType(std::string_view mime_type) {
// TODO(lukasza): Restrict this to only known, non-sniffable audio/video types
// (hopefully we can reach agreement on this approach + document this in ORB
// spec). See also https://github.com/annevk/orb/issues/3. Notes:
// - In the long-term (once Javascript sniffing is implemented) this will
// prevent non-webby images (e.g. image/vnd.adobe.photoshop) from being
// unnecessarily allowed by ORB.
// - In the short-term this shouldn't matter for security of 200 responses
// (with only HTML/XML/JSON sniffing current implementation wouldn't block
// such non-webby images anyway).
// - The current implementation reduces risk of blocking range requests for
// A) non-sniffable types and B) range responses for middle-of-resource
// when first-bytes-response wasn't seen earlier.
constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII;
if (base::StartsWith(mime_type, "audio/", kCaseInsensitive) ||
base::StartsWith(mime_type, "video/", kCaseInsensitive)) {
return true;
}
// Special-casing "application/ogg" here is a minor departure from the spec
// when IsAudioOrVideoMimeType is called from IsOpaqueSafelistedMimeType.
// OTOH, covering "application/ogg" here helps helps implement step 7 from ORB
// (sniffing audio/video in the OpaqueResponseBlockingAnalyzer::Sniff method
// below) because net::SniffMimeTypeFromLocalData may return
// "application/ogg".
if (base::EqualsCaseInsensitiveASCII(mime_type, "application/ogg"))
return true;
// TODO(lukasza): Address this departure from the spec (which doesn't
// explicitly mention DASH and other MIME types here). The current
// implementation enforces strict MIME types for DASH/HLS resources - if this
// can ship without too much of web-compatibility issues, then we should
// modify ORB spec to match this implementation. If there is too much
// web-compatibility risk, then ORB might need to fully parse DASH/HLS
// manifests.
if (base::EqualsCaseInsensitiveASCII(mime_type, "application/dash+xml"))
return true;
if (base::EqualsCaseInsensitiveASCII(mime_type,
"application/vnd.apple.mpegurl"))
return true;
if (base::EqualsCaseInsensitiveASCII(mime_type, "text/vtt"))
return true;
return false;
}
bool IsTextCssMimeType(std::string_view mime_type) {
return base::EqualsCaseInsensitiveASCII(mime_type, "text/css");
}
// ORB spec says that "An opaque-safelisted MIME type" is a JavaScript MIME type
// or a MIME type whose essence is "text/css" or "image/svg+xml".
bool IsOpaqueSafelistedMimeType(std::string_view mime_type) {
// Based on the spec: Is it a MIME type whose essence is text/css [...] ?
if (IsTextCssMimeType(mime_type))
return true;
// Based on the spec: Is it a MIME type whose essence is [...] image/svg+xml?
if (IsNonSniffableImageMimeType(mime_type))
return true;
// Deviation from spec: We do not handle JavaScript MIME types here. See
// comments at IsOpaqueSafelistedMimeTypeThatWeSniffAnyway and the
// IsOpaqueSafelistedMimeType call site for details.
// TODO(vogelheim): Departure from the spec - see the comment in
// IsAudioOrVideoMimeType for more details.
if (IsAudioOrVideoMimeType(mime_type))
return true;
return false;
}
// ORB spec defines "an opaque-safelisted MIME type". Until we have full ORB
// compliance, we'll need to handle some MIME types differently and run the
// JavaScript-parser-breaker sniffer from CORB on these resources.
bool IsOpaqueSafelistedMimeTypeThatWeSniffAnyway(std::string_view mime_type) {
// Based on the spec, but handled in HandleEndOfSniffableResponseBody:
// Is it a JavaScript MIME type?
if (IsJavascriptMimeType(mime_type)) {
return true;
}
return false;
}
// This corresponds to https://fetch.spec.whatwg.org/#ok-status
bool IsOkayHttpStatus(const mojom::URLResponseHead& response) {
if (!response.headers)
return false;
int code = response.headers->response_code();
return (200 <= code) && (code <= 299);
}
bool IsHttpStatus(const mojom::URLResponseHead& response,
int expected_status_code) {
if (!response.headers)
return false;
int code = response.headers->response_code();
return code == expected_status_code;
}
bool IsRangeResponseWithMiddleOfResource(
const mojom::URLResponseHead& response) {
if (!response.headers)
return false;
if (!IsHttpStatus(response, 206))
return false;
std::optional<std::string> range =
response.headers->GetNormalizedHeader("content-range");
if (!range) {
return false;
}
int64_t first_byte_position = -1;
int64_t last_byte_position = -1;
int64_t instance_length = -1;
if (!net::HttpUtil::ParseContentRangeHeaderFor206(
*range, &first_byte_position, &last_byte_position,
&instance_length)) {
return false;
}
return first_byte_position > 0;
}
bool IsOpaqueResponse(const std::optional<url::Origin>& request_initiator,
mojom::RequestMode request_mode,
const mojom::URLResponseHead& response) {
// ORB only applies to "no-cors" requests.
if (request_mode != mojom::RequestMode::kNoCors)
return false;
// Browser-initiated requests are never opaque.
if (!request_initiator.has_value())
return false;
// Requests from foo.example.com will consult foo.example.com's service worker
// first (if one has been registered). The service worker can handle requests
// initiated by foo.example.com even if they are cross-origin (e.g. requests
// for bar.example.com). This is okay, because there is no security boundary
// between foo.example.com and the service worker of foo.example.com + because
// the response data is "conjured" within the service worker of
// foo.example.com (rather than being fetched from bar.example.com).
// Therefore such responses should not be blocked by CORB, unless the
// initiator opted out of CORS / opted into receiving an opaque response. See
// also https://crbug.com/803672.
if (response.was_fetched_via_service_worker) {
switch (response.response_type) {
case network::mojom::FetchResponseType::kBasic:
case network::mojom::FetchResponseType::kCors:
case network::mojom::FetchResponseType::kDefault:
case network::mojom::FetchResponseType::kError:
// Non-opaque responses shouldn't be blocked.
return false;
case network::mojom::FetchResponseType::kOpaque:
case network::mojom::FetchResponseType::kOpaqueRedirect:
// Opaque responses are eligible for blocking. Continue on...
break;
}
}
return true;
}
bool HasNoSniff(
const mojom::URLResponseHead& response) {
// TODO(vogelheim): Check for compatibility with spec &
// ParseContentTypeOptionsHeader. Maybe move this to parsed_headers.
if (!response.headers) {
return false;
}
std::string nosniff_header =
response.headers->GetNormalizedHeader("x-content-type-options")
.value_or(std::string());
return base::EqualsCaseInsensitiveASCII(nosniff_header, "nosniff");
}
} // namespace
OpaqueResponseBlockingAnalyzer::OpaqueResponseBlockingAnalyzer(
PerFactoryState* state)
: per_factory_state_(*state) {
CHECK(state);
}
OpaqueResponseBlockingAnalyzer::~OpaqueResponseBlockingAnalyzer() {
// TODO(crbug.com/40169301): Add UMA tracking the size of ORB state
// from `per_factory_state_`.
}
Decision OpaqueResponseBlockingAnalyzer::Init(
const GURL& request_url,
const std::optional<url::Origin>& request_initiator,
mojom::RequestMode request_mode,
mojom::RequestDestination request_destination_from_renderer,
const network::mojom::URLResponseHead& response) {
// Exclude responses that ORB doesn't apply to.
if (!IsOpaqueResponse(request_initiator, request_mode, response))
return Decision::kAllow;
DCHECK(request_initiator.has_value());
// Same-origin requests are allowed (the ORB spec doesn't explicitly deal with
// this, because it assumes that the Fetch spec has already determined that
// the request is cross-origin, before handing off to ORB).
if (request_initiator->IsSameOriginWith(request_url))
return Decision::kAllow;
// Remember request properties that will be needed later.
is_http_status_okay_ = IsOkayHttpStatus(response);
if (response.content_length == 0)
is_empty_response_ = true;
if (response.headers && response.headers->response_code() == 204)
is_empty_response_ = true;
if (response.headers &&
(response.headers->HasHeader("Attribution-Reporting-Register-Source") ||
response.headers->HasHeader("Attribution-Reporting-Register-Trigger") ||
response.headers->HasHeader(
"Attribution-Reporting-Register-OS-Source") ||
response.headers->HasHeader(
"Attribution-Reporting-Register-OS-Trigger"))) {
is_attribution_response_ = true;
}
// TODO(lukasza): Consider tweaking how `final_request_url_` is used to
// properly handle interactions between redirects and range requests. For
// example, ORB might sniff an initial a.com/a1 -> a.com/a2 redirect as media
// which should allow future range requests to the "same" resource. But what
// if in the future something like load-balancing kicks-in and a.com/a1 ->
// a.com/a3 redirect happens instead? This might require remembering that not
// just a2, but also a1 is safe. Similar considerations (checking all
// consecutive, same-origin redirect hops) apply both to the initial request
// (deciding which URLs from the redirect chain to store as validated as
// media) and to the subsequent range requests (deciding which URLs from the
// chain to validate against the ones in the store of validated URLs).
final_request_url_ = request_url;
request_destination_from_renderer_ = request_destination_from_renderer;
// 1. Let mimeType be the result of extracting a MIME type from response's
// header list.
if (response.headers)
response.headers->GetMimeType(&mime_type_);
// 2. Let nosniff be the result of determining nosniff given response's header
// list.
is_no_sniff_header_present_ =
HasNoSniff(response);
// 3. If mimeType is not failure, then:
if (!mime_type_.empty()) {
// 3.i. If mimeType is an opaque-safelisted MIME type, then return true.
//
// Because "ORB v0.1" does not have a JSON/JS parser step, we will not
// consider JS resources here and instead employ JSON-or-JS-parser-breaker
// sniffer on these resources. This means that for JS resources, step 3.i.
// from ORB is postponed until HandleEndOfSniffableResponseBody, instead of
// being handled here.
//
// Whether ORB spec can adopt this behavior is being discussed in
// https://github.com/annevk/orb/issues/30.
//
// TODO(vogelheim/lukasza): Resolve this difference from the ORB spec.
// TODO(vogelheim/lukasza): Consider other early-allow mechanisms (e.g. CORP
// - see https://github.com/annevk/orb/issues/30#issuecomment-971373842).
if (IsOpaqueSafelistedMimeType(mime_type_))
return Decision::kAllow;
// ii. If mimeType is an opaque-blocklisted-never-sniffed MIME type, then
// return false.
// iv. If nosniff is true and mimeType is an opaque-blocklisted MIME type or
// its essence is "text/plain", then return false.
//
// Step iii. is missing - this is departure from how full ORB handles 206
// responses labeled as html/json/xml. This seems okay given that we
// tighten our implementation of step 4 below (handling of range requests).
switch (GetCanonicalMimeType(mime_type_)) {
case MimeType::kNeverSniffed:
blocking_decision_reason_ =
BlockingDecisionReason::kNeverSniffedMimeType;
return Decision::kBlock; // Step ii.
case MimeType::kHtml:
case MimeType::kJson:
case MimeType::kPlain:
case MimeType::kXml:
if (is_no_sniff_header_present_) {
blocking_decision_reason_ = BlockingDecisionReason::kNoSniffHeader;
return Decision::kBlock; // Step iv.
}
break;
case MimeType::kOthers:
// TODO(vogelheim/lukasza): Departure from the spec: We currently
// handle audio/video MIME types as "opaque safelisted", to prevent
// sniffing on them and on XML-based media types in particular.
CHECK(!IsAudioOrVideoMimeType(mime_type_));
break;
case MimeType::kInvalidMimeType:
break;
}
}
// 4. If request's no-cors media request state is "subsequent", then return
// true.
//
// TODO(lukasza): Departure from the spec:
// Diff from the (blocking) step 3.iii.:
// - Moved slightly later
// - No extra conditions like "and mimeType is an opaque-blocklisted MIME
// type" (e.g. html, xml, or json).
// Diff from the (allowing) step 4.:
// - Only applying this step to IsRangeResponseWithMiddleOfResource cases
if (IsRangeResponseWithMiddleOfResource(response)) {
if (IsAllowedAudioVideoRequest(request_url)) {
return Decision::kAllow;
} else {
blocking_decision_reason_ =
BlockingDecisionReason::kUnexpectedRangeResponse;
return Decision::kBlock;
}
}
// 5. Wait for 1024 bytes of response or end-of-file, whichever comes first
// and let bytes be those bytes.
return Decision::kSniffMore;
}
Decision OpaqueResponseBlockingAnalyzer::Sniff(std::string_view data) {
std::string sniffed_mime_type;
net::SniffMimeTypeFromLocalData(data, &sniffed_mime_type);
// 7. If the audio or video type pattern matching algorithm given bytes does
// not return undefined, then:
if (IsAudioOrVideoMimeType(sniffed_mime_type)) {
// i. Append (request's opaque media identifier, request's current URL) to
// the user agent's opaque-safelisted requesters set.
StoreAllowedAudioVideoRequest(final_request_url_);
// ii. Return true.
return Decision::kAllow;
}
// Spec-divergence: no step 8:
// 8. If requests's no-cors media request state is not "N/A", then return
// false.
// This implementation doesn't know if the request came from a media element
// or not. Making the decision based on earlier sniffing should be okay.
// 9. If the image type pattern matching algorithm given bytes does not
// return undefined, then return true.
constexpr auto kCaseInsensitive = base::CompareCase::INSENSITIVE_ASCII;
if (base::StartsWith(sniffed_mime_type, "image/", kCaseInsensitive))
return Decision::kAllow;
// At this point, a number of MIME types should be out of the running.
CHECK(!IsTextCssMimeType(mime_type_)); // OpaqueSafelistedMimeType are not
// sniffed.
CHECK(!IsAudioOrVideoMimeType(mime_type_)); // Ditto.
CHECK(!IsNonSniffableImageMimeType(mime_type_)); // Ditto.
// 12. If mimeType is failure, then return true.
//
// The spec proposal handles this step before checking for JS and JSON. To
// be compatible, we handle this before our 'sniffing' steps that handle
// those formats.
//
// TODO(lukasza): This is not fully accurate - it doesn't capture all the
// possible failure modes of
// https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
if (mime_type_.empty()) {
return Decision::kAllow;
}
// Check if the response is HTML, XML, or JSON, in which case it is surely not
// JavaScript. (The sniffers account for HTML/JS polyglot cases - see
// https://crbug.com/839945 and https://crbug.com/839425. OTOH, the sniffers
// do not account for CSS/HTML or CSS/JS-parser-breakers polyglots so CSS is
// explicitly excluded from the sniffing below.)
//
// TODO(lukasza): Departure from the spec. This avoids having to sniff
// Javascript in the full response as described in the "Gradual CORB -> ORB
// transition" doc at
// https://docs.google.com/document/d/1qUbE2ySi6av3arUEw5DNdFJIKKBbWGRGsXz_ew3S7HQ/edit?usp=sharing
// Diff: This is a new sniffing step for the 1st 1024 bytes.
// Diff: This doesn't sniff for JavaScript, but for non-Html/Xml/Json.
if (SniffForHTML(data) == SniffingResult::kYes) {
blocking_decision_reason_ = BlockingDecisionReason::kSniffedAsHtml;
return Decision::kBlock;
}
if (SniffForXML(data) == SniffingResult::kYes) {
blocking_decision_reason_ = BlockingDecisionReason::kSniffedAsXml;
return Decision::kBlock;
}
// Check for JSON and JS parser breakers.
if (SniffForFetchOnlyResource(data) == SniffingResult::kYes) {
blocking_decision_reason_ = BlockingDecisionReason::kSniffedAsJson;
return Decision::kBlock;
}
return Decision::kSniffMore;
}
Decision OpaqueResponseBlockingAnalyzer::HandleEndOfSniffableResponseBody() {
// Deviation from spec: We run JSON-or-JS-parser-breaker sniffer on some
// MIME types. To do so, we have taken them out of IsOpaqueSafelistedMimeType
// and instead handle them here. So this effectively handles some cases
// the spec handles in step 3.i.
//
// TODO(vogelheim/lukasza): Resolve this difference from the ORB spec.
// TODO(vogelheim/lukasza): Consider other early-allow mechanisms (e.g. CORP -
// see https://github.com/annevk/orb/issues/30#issuecomment-971373842).
if (IsOpaqueSafelistedMimeTypeThatWeSniffAnyway(mime_type_))
return Decision::kAllow;
// TODO(lukasza): Implement the following steps from ORB spec:
// 10. If nosniff is true, then return false.
// 11. If response's status is not an ok status, then return false.
// (Skipping these steps minimizes the risk of shipping the initial ORB
// implementation.)
// TODO(lukasza): Departure from the spec discussed in
// https://github.com/annevk/orb/issues/3.
// Diff: Removing step 13:
// 13. If mimeType's essence starts with "audio/", "image/", or "video/",
// then return false.
// TODO(lukasza): Departure from the spec, because the current implementation
// avoids full Javascript parsing as described in the "Gradual CORB -> ORB
// transition" doc at
// https://docs.google.com/document/d/1qUbE2ySi6av3arUEw5DNdFJIKKBbWGRGsXz_ew3S7HQ/edit?usp=sharing
// Diff: Skipping/ignoring step 15:
// 15. If response's body parses as JavaScript and does not parse as JSON,
// then return true.
// Diff: Changing step 16 to fail open (e.g. return true / kAllow):
// 16. Return false.
return Decision::kAllow;
}
bool OpaqueResponseBlockingAnalyzer::ShouldReportBlockedResponse() const {
// Empty attribution responses may still result in changes to web-visible
// behavior when blocked, so they should always be reported. See
// https://crbug.com/1369637.
return (!is_empty_response_ && is_http_status_okay_) ||
is_attribution_response_;
}
ResponseAnalyzer::BlockedResponseHandling
OpaqueResponseBlockingAnalyzer::ShouldHandleBlockedResponseAs() const {
// "ORB v0.1" uses CORB-style error handling with injecting an empty response.
// "ORB v0.2" uses ORB-specified error handling (injecting a network error)
// for non-script fetches, by injecting a network error.
// "ORB errors-for-all-fetches" uses ORB-specified error handling everywhere.
if (base::FeatureList::IsEnabled(
features::kOpaqueResponseBlockingErrorsForAllFetches)) {
return BlockedResponseHandling::kNetworkError;
}
if (request_destination_from_renderer_ != mojom::RequestDestination::kEmpty) {
return BlockedResponseHandling::kNetworkError;
}
return BlockedResponseHandling::kEmptyResponse;
}
void OpaqueResponseBlockingAnalyzer::StoreAllowedAudioVideoRequest(
const GURL& media_url) {
per_factory_state_->insert(media_url);
}
bool OpaqueResponseBlockingAnalyzer::IsAllowedAudioVideoRequest(
const GURL& media_url) {
return per_factory_state_->contains(media_url);
}
} // namespace network::orb
|