1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
|
// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// ReportingService handles uploading serialized logs to a server.
#include "components/metrics/reporting_service.h"
#include <cstdio>
#include <memory>
#include <string_view>
#include "base/base64.h"
#include "base/command_line.h"
#include "base/functional/bind.h"
#include "base/functional/callback.h"
#include "base/logging.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/stringprintf.h"
#include "components/metrics/data_use_tracker.h"
#include "components/metrics/log_store.h"
#include "components/metrics/metrics_features.h"
#include "components/metrics/metrics_log_uploader.h"
#include "components/metrics/metrics_service_client.h"
#include "components/metrics/metrics_upload_scheduler.h"
namespace metrics {
// static
void ReportingService::RegisterPrefs(PrefRegistrySimple* registry) {
DataUseTracker::RegisterPrefs(registry);
}
ReportingService::ReportingService(MetricsServiceClient* client,
PrefService* local_state,
size_t max_retransmit_size,
MetricsLogsEventManager* logs_event_manager)
: client_(client),
local_state_(local_state),
max_retransmit_size_(max_retransmit_size),
logs_event_manager_(logs_event_manager),
reporting_active_(false),
log_upload_in_progress_(false),
data_use_tracker_(DataUseTracker::Create(local_state)) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(client_);
DCHECK(local_state);
}
ReportingService::~ReportingService() {
DisableReporting();
}
void ReportingService::Initialize() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(!upload_scheduler_);
log_store()->LoadPersistedUnsentLogs();
base::RepeatingClosure send_next_log_callback = base::BindRepeating(
&ReportingService::SendNextLog, self_ptr_factory_.GetWeakPtr());
bool fast_startup = client_->ShouldStartUpFast();
upload_scheduler_ = std::make_unique<MetricsUploadScheduler>(
send_next_log_callback, fast_startup);
}
void ReportingService::Start() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (reporting_active_) {
upload_scheduler_->Start();
}
}
void ReportingService::Stop() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (upload_scheduler_) {
upload_scheduler_->Stop();
}
}
void ReportingService::EnableReporting() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (reporting_active_)
return;
reporting_active_ = true;
Start();
}
void ReportingService::DisableReporting() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
reporting_active_ = false;
Stop();
}
bool ReportingService::reporting_active() const {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
return reporting_active_;
}
#if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_IOS)
void ReportingService::OnAppEnterBackground() {
is_in_foreground_ = false;
}
void ReportingService::OnAppEnterForeground() {
is_in_foreground_ = true;
#if BUILDFLAG(IS_ANDROID)
// Starting from Android 15, network requests initiated outside of a valid
// process lifecycle (e.g. in our case, while the app is in the background)
// will receive an exception:
// https://developer.android.com/about/versions/15/behavior-changes-all#background-network-access
// From our side, this manifests as the uploads failing with various errors
// (105 NAME_NOT_RESOLVED, 103 CONNECTION_ABORTED, 118 CONNECTION_TIMED_OUT,
// etc.). We have backoff logic to retry the uploads at increasingly long
// intervals when such errors are encountered -- with the assumption that
// something is currently wrong with the server -- but this is not true in
// this case. We should instead retry when the user foregrounds. Otherwise,
// the next upload attempt may unnecessarily get scheduled very far in the
// future (up to 24h) if the user leaves the app in the background for a long
// time, e.g. overnight. This has the side effect that when they actually
// start using Chrome again, logs don't get created periodically anymore. And
// although logs may be created through other means (e.g. upon backgrounding
// or foregrounding), they don't get uploaded because the next attempt is
// scheduled far in the future, which in turn results in an accumulation of
// logs on the device, which in turn results in logs being trimmed, which in
// turn results in data loss.
// See crbug.com/420459511.
// There are two scenarios of interest when the user foregrounds.
// First, the user foregrounds while we're waiting for the next scheduled
// upload (which may be far in the future because of the backoff logic). This
// is handled here -- we trigger the upload right away instead.
// Second, the user foregrounds while an upload initiated from the background
// is in progress (it can take a while before the request fails). In those
// cases, when the upload eventually fails, the upload will be re-scheduled,
// but we don't want it to use backoff interval logic since uploads should now
// start succeeding -- this is handled in OnLogUploadComplete() below.
if (upload_scheduler_ && upload_scheduler_->IsRunning() &&
!log_upload_in_progress_ &&
failures_started_from_background_.value_or(false) &&
base::FeatureList::IsEnabled(
features::kResetMetricsUploadBackoffOnForeground)) {
upload_scheduler_->RestartWithUnsentLogsInterval();
}
#endif // BUILDFLAG(IS_ANDROID)
}
#endif // BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_IOS)
//------------------------------------------------------------------------------
// private methods
//------------------------------------------------------------------------------
void ReportingService::SendNextLog() {
DVLOG(1) << "SendNextLog";
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
const base::TimeTicks now = base::TimeTicks::Now();
LogActualUploadInterval(last_upload_finish_time_.is_null()
? base::TimeDelta()
: now - last_upload_finish_time_);
last_upload_finish_time_ = now;
if (!reporting_active()) {
upload_scheduler_->StopAndUploadCancelled();
return;
}
if (!log_store()->has_unsent_logs()) {
// Should only get here if serializing the log failed somehow.
upload_scheduler_->Stop();
// Reset backoff interval
upload_scheduler_->UploadFinished(true);
return;
}
if (!log_store()->has_staged_log()) {
reporting_info_.set_attempt_count(0);
log_store()->StageNextLog();
}
// Check whether the log should be uploaded based on user id. If it should not
// be sent, then discard the log from the store and notify the scheduler.
auto staged_user_id = log_store()->staged_log_user_id();
if (staged_user_id.has_value() &&
!client_->ShouldUploadMetricsForUserId(staged_user_id.value())) {
// Remove the log and update list to disk.
log_store()->DiscardStagedLog();
log_store()->TrimAndPersistUnsentLogs(/*overwrite_in_memory_store=*/true);
// Notify the scheduler that the next log should be uploaded. If there are
// no more logs, then stop the scheduler.
if (!log_store()->has_unsent_logs()) {
DVLOG(1) << "Stopping upload_scheduler_.";
upload_scheduler_->Stop();
}
upload_scheduler_->UploadFinished(true);
return;
}
// Proceed to stage the log for upload if log size satisfies cellular log
// upload constrains.
bool upload_canceled = false;
bool is_cellular_logic = client_->IsOnCellularConnection();
if (is_cellular_logic && data_use_tracker_ &&
!data_use_tracker_->ShouldUploadLogOnCellular(
log_store()->staged_log().size())) {
upload_scheduler_->UploadOverDataUsageCap();
upload_canceled = true;
} else {
SendStagedLog();
}
if (is_cellular_logic) {
LogCellularConstraint(upload_canceled);
}
}
void ReportingService::SendStagedLog() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(log_store()->has_staged_log());
if (!log_store()->has_staged_log())
return;
CHECK(!log_upload_in_progress_);
log_upload_in_progress_ = true;
#if BUILDFLAG(IS_ANDROID)
// Keep track of whether the upload was initiated from the background for the
// backoff reset logic (see feature kResetMetricsUploadBackoffOnForeground).
CHECK(!log_upload_initiated_from_background_.has_value());
log_upload_initiated_from_background_ = !is_in_foreground_;
#endif // BUILDFLAG(IS_ANDROID)
if (!log_uploader_) {
log_uploader_ = client_->CreateUploader(
GetUploadUrl(), GetInsecureUploadUrl(), upload_mime_type(),
service_type(),
base::BindRepeating(&ReportingService::OnLogUploadComplete,
self_ptr_factory_.GetWeakPtr()));
}
reporting_info_.set_attempt_count(reporting_info_.attempt_count() + 1);
const std::string hash = base::HexEncode(log_store()->staged_log_hash());
std::string signature =
base::Base64Encode(log_store()->staged_log_signature());
if (logs_event_manager_) {
logs_event_manager_->NotifyLogEvent(
MetricsLogsEventManager::LogEvent::kLogUploading,
log_store()->staged_log_hash());
}
log_uploader_->UploadLog(log_store()->staged_log(),
log_store()->staged_log_metadata(), hash, signature,
reporting_info_);
}
void ReportingService::OnLogUploadComplete(
int response_code,
int error_code,
bool was_https,
bool force_discard,
std::string_view force_discard_reason) {
DVLOG(1) << "OnLogUploadComplete:" << response_code;
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
reporting_info_.set_last_response_code(response_code);
reporting_info_.set_last_error_code(error_code);
reporting_info_.set_last_attempt_was_https(was_https);
// Log a histogram to track response success vs. failure rates.
LogResponseOrErrorCode(response_code, error_code, was_https);
bool upload_succeeded = response_code == 200;
// Staged log could have been removed already (such as by Purge() in some
// implementations), otherwise we may remove it here.
if (log_store()->has_staged_log()) {
// Provide boolean for error recovery (allow us to ignore response_code).
bool discard_log = false;
std::string_view discard_reason;
const std::string& staged_log = log_store()->staged_log();
const size_t log_size = staged_log.length();
if (upload_succeeded) {
LogSuccessLogSize(log_size);
LogSuccessMetadata(staged_log);
discard_log = true;
discard_reason = "Log upload successful.";
} else if (force_discard) {
discard_log = true;
discard_reason = force_discard_reason;
} else if (log_size > max_retransmit_size_) {
LogLargeRejection(log_size);
discard_log = true;
discard_reason =
"Failed to upload, and log is too large. Will not attempt to "
"retransmit.";
} else if (response_code == 400) {
// Bad syntax. Retransmission won't work.
discard_log = true;
discard_reason =
"Failed to upload because log has bad syntax. Will not attempt to "
"retransmit.";
}
if (!discard_log && logs_event_manager_) {
// The log is not discarded, meaning that it has failed to upload. We will
// try to retransmit it.
logs_event_manager_->NotifyLogEvent(
MetricsLogsEventManager::LogEvent::kLogStaged,
log_store()->staged_log_hash(),
base::StringPrintf("Failed to upload (status code: %d, net error "
"code: %d). Staged again for retransmission.",
response_code, error_code));
}
if (discard_log) {
if (upload_succeeded)
log_store()->MarkStagedLogAsSent();
log_store()->DiscardStagedLog(discard_reason);
// Store the updated list to disk now that the removed log is uploaded.
log_store()->TrimAndPersistUnsentLogs(/*overwrite_in_memory_store=*/true);
bool flush_local_state =
base::FeatureList::IsEnabled(features::kReportingServiceAlwaysFlush);
#if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_IOS)
// If Chrome is in the background, flush the discarded and trimmed logs
// from |local_state_| immediately because the process may be killed at
// any time from now without persisting the changes. Otherwise, we may end
// up re-uploading the same log in a future session. We do not do this if
// Chrome is in the foreground because of the assumption that
// |local_state_| will be flushed when convenient, and we do not want to
// do more work than necessary on the main thread while Chrome is visible.
flush_local_state = flush_local_state || !is_in_foreground_;
#endif // BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_IOS)
if (flush_local_state) {
local_state_->CommitPendingWrite();
}
}
}
// Error 400 indicates a problem with the log, not with the server, so
// don't consider that a sign that the server is in trouble. Similarly, if
// |force_discard| is true, do not delay the sending of other logs. For
// example, if |force_discard| is true because there are no metrics server
// URLs included in this build, do not indicate that the "non-existent server"
// is in trouble, which would delay the sending of other logs and causing the
// accumulation of logs on disk.
bool server_is_healthy =
upload_succeeded || response_code == 400 || force_discard;
if (!log_store()->has_unsent_logs()) {
DVLOG(1) << "Stopping upload_scheduler_.";
upload_scheduler_->Stop();
}
CHECK(log_upload_in_progress_);
log_upload_in_progress_ = false;
#if BUILDFLAG(IS_ANDROID)
// When `server_is_healthy` is false, representing a failure with the upload,
// then we will start using the backoff logic in `upload_scheduler_`. Keep
// track of if these failures only started happening while in the background.
// TODO: crbug.com/420459511: `server_is_healthy` is not accurate anymore,
// rename it here and other places.
if (!server_is_healthy) {
if (!failures_started_from_background_.has_value()) {
failures_started_from_background_ = log_upload_initiated_from_background_;
}
// Since Android 15, network requests initiated from the background will
// fail (see comment in `OnAppEnterForeground()` above for more details),
// even if the user foregrounds during the request. Don't use the backoff
// logic in this case since there's probably nothing wrong with the server
// (but only if the failures started happening from the background --
// otherwise, something wrong is probably going on).
if (*failures_started_from_background_ && is_in_foreground_ &&
base::FeatureList::IsEnabled(
features::kResetMetricsUploadBackoffOnForeground)) {
server_is_healthy = true;
}
} else {
failures_started_from_background_ = std::nullopt;
}
CHECK(log_upload_initiated_from_background_.has_value());
log_upload_initiated_from_background_ = std::nullopt;
#endif // BUILDFLAG(IS_ANDROID)
upload_scheduler_->UploadFinished(server_is_healthy);
}
} // namespace metrics
|