File: tflite_model_executor.h

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (610 lines) | stat: -rw-r--r-- 25,729 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_OPTIMIZATION_GUIDE_CORE_TFLITE_MODEL_EXECUTOR_H_
#define COMPONENTS_OPTIMIZATION_GUIDE_CORE_TFLITE_MODEL_EXECUTOR_H_

#include <optional>

#include "base/files/file.h"
#include "base/functional/bind.h"
#include "base/functional/callback_forward.h"
#include "base/logging.h"
#include "base/metrics/histogram.h"
#include "base/metrics/histogram_functions.h"
#include "base/sequence_checker.h"
#include "base/task/sequenced_task_runner.h"
#include "base/task/thread_pool.h"
#include "base/time/time.h"
#include "base/timer/elapsed_timer.h"
#include "base/trace_event/trace_event.h"
#include "base/types/expected.h"
#include "components/optimization_guide/core/execution_status.h"
#include "components/optimization_guide/core/model_enums.h"
#include "components/optimization_guide/core/model_execution_timeout_watchdog.h"
#include "components/optimization_guide/core/model_executor.h"
#include "components/optimization_guide/core/model_util.h"
#include "components/optimization_guide/core/optimization_guide_features.h"
#include "components/optimization_guide/machine_learning_tflite_buildflags.h"
#include "third_party/tflite/src/tensorflow/lite/c/common.h"
#include "third_party/tflite_support/src/tensorflow_lite_support/cc/task/core/base_task_api.h"

namespace optimization_guide {

namespace {

// Util class for recording the result of the model execution. The result is
// recorded when it goes out of scope and its destructor is called.
class ScopedExecutionStatusResultRecorder {
 public:
  explicit ScopedExecutionStatusResultRecorder(
      proto::OptimizationTarget optimization_target)
      : optimization_target_(optimization_target) {}

  ~ScopedExecutionStatusResultRecorder() {
    base::UmaHistogramEnumeration(
        "OptimizationGuide.ModelExecutor.ExecutionStatus." +
            optimization_guide::GetStringNameForOptimizationTarget(
                optimization_target_),
        status_);
  }

  ExecutionStatus* mutable_status() { return &status_; }

  ExecutionStatus status() const { return status_; }

  void set_status(ExecutionStatus status) { status_ = status; }

 private:
  // The OptimizationTarget of the model being executed.
  const proto::OptimizationTarget optimization_target_;

  ExecutionStatus status_ = ExecutionStatus::kUnknown;
};

}  // namespace

// An ModelExecutor that executes tflite models with arbitrary
// input and output types. Note that callers will need to give an implementation
// of this class to a |ModelHandler|, whereas the
// handle is the actual class that calling code would own and call into.
//
// By default, the model file will be (re)loaded for every execution and then
// unloaded from memory after every execution (e.g.: "OnComplete"). This helps
// to keep memory usage of the browser process down, but does delay model
// execution by the time it takes to load the model (about 50ms in practice).
// See |SetShouldUnloadModelOnComplete| to override this behavior.
//
// Note that when built with the MediaPipe backend (non-default), task
// cancellation is not supported.
template <class OutputType,
          class InputType,
          // TODO(b/283522287): Remove this once all usage of TFLite Task
          // Support are replaced by MediaPipe.
          class ModelExecutionTaskType =
              tflite::task::core::BaseTaskApi<OutputType, InputType>>
class TFLiteModelExecutor : public ModelExecutor<OutputType, InputType> {
 public:
  TFLiteModelExecutor()
      : watchdog_(nullptr, base::OnTaskRunnerDeleter(nullptr)) {}

  ~TFLiteModelExecutor() override {
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

    // Unload the model. Do not use `UnloadModel` since it may be overridden by
    // a subclass and hence not available from this destructor.
    model_fb_.reset();
  }

  // Should be called on the same sequence as the ctor, but once called |this|
  // must only be used from the |execution_task_runner| thread/sequence.
  void InitializeAndMoveToExecutionThread(
      std::optional<base::TimeDelta> model_inference_timeout,
      proto::OptimizationTarget optimization_target,
      scoped_refptr<base::SequencedTaskRunner> execution_task_runner,
      scoped_refptr<base::SequencedTaskRunner> reply_task_runner) override {
    DCHECK(!execution_task_runner_);
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
    DCHECK_NE(optimization_target,
              proto::OptimizationTarget::OPTIMIZATION_TARGET_UNKNOWN);

    DETACH_FROM_SEQUENCE(sequence_checker_);
    optimization_target_ = optimization_target;
    execution_task_runner_ = execution_task_runner;
    reply_task_runner_ = reply_task_runner;
    model_loading_task_runner_ = base::ThreadPool::CreateSequencedTaskRunner(
        {base::MayBlock(), base::TaskPriority::BEST_EFFORT});

    if (features::IsModelExecutionWatchdogEnabled()) {
      // The sequence |watchdog_sequence| is used to run watchdog's task. The
      // watchdog must be deleted on that sequence to guarantee that pending
      // tasks can safely be executed.
      scoped_refptr<base::SequencedTaskRunner> watchdog_sequence =
          base::ThreadPool::CreateSequencedTaskRunner({base::MayBlock()});
      watchdog_ = std::unique_ptr<ModelExecutionTimeoutWatchdog,
                                  base::OnTaskRunnerDeleter>(
          new ModelExecutionTimeoutWatchdog(
              watchdog_sequence, optimization_target_,
              model_inference_timeout.value_or(
                  features::ModelExecutionWatchdogDefaultTimeout())),
          base::OnTaskRunnerDeleter(watchdog_sequence));
    }
  }

  // Called when a model file is available to load. Immediately loads model into
  // memory when `should_preload_model_` is set.
  void UpdateModelFile(
      base::optional_ref<const base::FilePath> file_path) override {
    DCHECK(execution_task_runner_ &&
           execution_task_runner_->RunsTasksInCurrentSequence());
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

    UnloadModel();
    DCHECK(!loaded_model_);
    DCHECK(!model_fb_);

    // The model has been removed.
    if (!file_path.has_value()) {
      model_file_path_.reset();
      return;
    }
    model_file_path_ = *file_path;

    // crbug/1257189: Histogram enums can't use dynamically created histogram
    // names, so factory create the local histogram (used in testing).
    base::HistogramBase* histogram = base::BooleanHistogram::FactoryGet(
        "OptimizationGuide.ModelExecutor.ModelFileUpdated." +
            optimization_guide::GetStringNameForOptimizationTarget(
                optimization_target_),
        base::Histogram::kNoFlags);
    histogram->Add(true);

    if (should_preload_model_) {
      LoadModelFile(base::DoNothing());
    }
  }

  // Calling this method allows the default model loading/unloading behavior to
  // be overridden. Setting this to false will cause the model to remain loaded
  // afterwards a model execution (e.g.: "OnComplete"), until |UnloadModel| is
  // called. False is the default behavior (see class comment).
  //
  // Note that keeping the model in memory for a long duration may be detected
  // as a memory leak in Chrome, and will always increase the private or shared
  // memory used by the browser by the size of the model file and the
  // constructed TFLite graph.
  void SetShouldUnloadModelOnComplete(
      bool should_unload_model_on_complete) override {
    DCHECK(execution_task_runner_->RunsTasksInCurrentSequence());
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
    should_unload_model_on_complete_ = should_unload_model_on_complete;
  }

  // Calling this method allows the default model preloading behavior to
  // be overridden. Setting this to true will cause the model to be loaded as
  // soon as its file path is available. Callers may also need to call
  // `SetShouldUnloadModelOnComplete(true)` to keep the model in memory for the
  // lifetime of the entire browsing session.
  //
  // Note that keeping the model in memory for a long duration may be detected
  // as a memory leak in Chrome, and will always increase the private or shared
  // memory used by the browser by the size of the model file and the
  // constructed TFLite graph.
  void SetShouldPreloadModel(bool should_preload_model) override {
    DCHECK(execution_task_runner_->RunsTasksInCurrentSequence());
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
    should_preload_model_ = should_preload_model;
  }

  // Clears the loaded model from memory if it is loaded. Safe to call when the
  // model is already unloaded, and becomes a no-op.
  void UnloadModel() override {
    TRACE_EVENT1("browser", "OptGuideModelExecutor::UnloadModel",
                 "OptimizationTarget",
                 optimization_guide::GetStringNameForOptimizationTarget(
                     optimization_target_));
    DCHECK(execution_task_runner_->RunsTasksInCurrentSequence());
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

    loaded_model_.reset();
    model_fb_.reset();
  }

  using ExecutionCallback =
      base::OnceCallback<void(const std::optional<OutputType>&)>;
  using BatchExecutionCallback =
      base::OnceCallback<void(const std::vector<std::optional<OutputType>>&)>;

  // When complete, |callback_on_complete| will be run via |reply_task_runner_|
  // with the outputs of the model.
  void SendForExecution(ExecutionCallback callback_on_complete,
                        base::TimeTicks start_time,
                        InputType input) override {
    BatchExecutionCallback adapted_callback = base::BindOnce(
        [](ExecutionCallback callback,
           const std::vector<std::optional<OutputType>>& output) {
          CHECK_EQ(output.size(), 1U);
          std::move(callback).Run(output[0]);
        },
        std::move(callback_on_complete));
    SendForBatchExecution(std::move(adapted_callback), start_time, {input});
  }

  // Starts the batch execution of the model. When complete,
  // |callback_on_complete| will be run via |reply_task_runner_| with the
  // outputs of the model.
  void SendForBatchExecution(
      BatchExecutionCallback callback_on_complete,
      base::TimeTicks start_time,
      ModelExecutor<OutputType, InputType>::ConstRefInputVector inputs)
      override {
    DCHECK(execution_task_runner_->RunsTasksInCurrentSequence());
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
    DCHECK(reply_task_runner_);

    base::TimeDelta task_scheduling_latency =
        base::TimeTicks::Now() - start_time;
    base::UmaHistogramMediumTimes(
        "OptimizationGuide.ModelExecutor.TaskSchedulingLatency." +
            optimization_guide::GetStringNameForOptimizationTarget(
                optimization_target_),
        task_scheduling_latency);

    // Load the model file in the background thread if not loaded yet, and
    // then batch execute the loaded model on the execution thread.
    LoadModelFileAndBatchExecute(std::move(callback_on_complete), inputs);
  }

  // Starts the synchronous execution of the model. Returns model outputs.
  // Model needs to be loaded. Synchronous calls do not load or unload model.
  std::vector<std::optional<OutputType>> SendForBatchExecutionSync(
      ModelExecutor<OutputType, InputType>::ConstRefInputVector inputs)
      override {
    DCHECK(execution_task_runner_->RunsTasksInCurrentSequence());
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

    std::vector<std::optional<OutputType>> outputs;
    outputs.reserve(inputs.size());
    // If the model isn't loaded yet, return null results.
    if (!loaded_model_) {
      for (size_t i = 0; i < inputs.size(); i++) {
        outputs.push_back(std::nullopt);
        // If the model is not loaded in a batch context, this status would not
        // get recorded the same number of times as it would in success. Thus,
        // increment the bucket |inputs.size()| number of times to keep metrics
        // sane.
        ScopedExecutionStatusResultRecorder status_recorder(
            optimization_target_);
        status_recorder.set_status(
            ExecutionStatus::kErrorModelFileNotAvailable);
      }
      return outputs;
    }

    BatchExecuteLoadedModel(inputs, &outputs);
    OnExecutionComplete();
    return outputs;
  }

  // IMPORTANT: These WeakPointers must only be dereferenced on the
  // |execution_task_runner| thread.
  base::WeakPtr<TFLiteModelExecutor> GetWeakPtrForExecutionThread() {
    return execution_sequence_weak_ptr_factory_.GetWeakPtr();
  }

  TFLiteModelExecutor(const TFLiteModelExecutor&) = delete;
  TFLiteModelExecutor& operator=(const TFLiteModelExecutor&) = delete;

 protected:
  using ModelExecutionTask =
      tflite::task::core::BaseTaskApi<OutputType, InputType>;

  // Executes the model using |execution_task| on |args|, returning the model
  // output and setting |out_status| with the status of the execution attempt.
  virtual std::optional<OutputType> Execute(
      ModelExecutionTaskType* execution_task,
      ExecutionStatus* out_status,
      InputType args) = 0;

  // Builds a model execution task using |model_file|. On error, the returned
  // `ExecutionStatus` will never be `ExecutionStatus::kSuccess`.
  virtual base::expected<std::unique_ptr<ModelExecutionTaskType>,
                         ExecutionStatus>
  BuildModelExecutionTask(base::File& model_file) = 0;

 private:
  using FileDeleteOnTaskRunner =
      std::unique_ptr<base::File, base::OnTaskRunnerDeleter>;

  static FileDeleteOnTaskRunner NullFileDeleteOnTaskRunner() {
    return {nullptr, base::OnTaskRunnerDeleter(nullptr)};
  }

  // Loads the model file in the background thread, and calls a callback on
  // model file loaded in memory on the model execution thread.
  void LoadModelFile(
      base::OnceCallback<void(ExecutionStatus)> model_loaded_callback) {
    TRACE_EVENT1("browser", "OptGuideModelExecutor::LoadModelFile",
                 "OptimizationTarget",
                 optimization_guide::GetStringNameForOptimizationTarget(
                     optimization_target_));
    DCHECK(execution_task_runner_->RunsTasksInCurrentSequence());
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

    UnloadModel();

    base::UmaHistogramBoolean(
        "OptimizationGuide.ModelExecutor.ModelAvailableToLoad." +
            GetStringNameForOptimizationTarget(optimization_target_),
        !!model_file_path_);

    // TODO(b/298673103): Multiple calls to LoadModelFile may trigger this
    // PostTask multiple times.

    // Run the slower model loading file I/O task on the background thread to
    // avoid blocking the main thread, e.g., the UI thread.
    model_loading_task_runner_->PostTaskAndReplyWithResult(
        FROM_HERE,
        // Anomynous model file loading function to be called on the background
        // thread, which returns the memory-mapped model file or nullptr if
        // failed to load.
        base::BindOnce(
            [](const std::optional<base::FilePath> model_file_path,
               proto::OptimizationTarget optimization_target,
               scoped_refptr<base::SequencedTaskRunner>
                   model_loading_task_runner)
                -> base::expected<FileDeleteOnTaskRunner, ExecutionStatus> {
              base::TimeTicks loading_start_time = base::TimeTicks::Now();
              if (!model_file_path) {
                return base::unexpected(
                    ExecutionStatus::kErrorModelFileNotAvailable);
              }

              FileDeleteOnTaskRunner model_fb(
                  new base::File(*model_file_path,
                                 base::File::FLAG_OPEN | base::File::FLAG_READ),
                  base::OnTaskRunnerDeleter(
                      std::move(model_loading_task_runner)));
              if (!model_fb->IsValid()) {
                return base::unexpected(
                    ExecutionStatus::kErrorModelFileNotValid);
              }

              // We only want to record successful loading times.
              base::UmaHistogramTimes(
                  "OptimizationGuide.ModelExecutor.ModelLoadingDuration2." +
                      optimization_guide::GetStringNameForOptimizationTarget(
                          optimization_target),
                  base::TimeTicks::Now() - loading_start_time);

              return std::move(model_fb);
            },
            model_file_path_, optimization_target_, model_loading_task_runner_),
        base::BindOnce(&TFLiteModelExecutor::OnModelFileLoadedInMemory,
                       GetWeakPtrForExecutionThread(),
                       std::move(model_loaded_callback)));
  }

  // Called on model file loaded in memory. Builds the model execution task from
  // the memory-mapped file, and calls `model_loaded_callback`.
  void OnModelFileLoadedInMemory(
      base::OnceCallback<void(ExecutionStatus)> model_loaded_callback,
      base::expected<FileDeleteOnTaskRunner, ExecutionStatus> model_fb) {
    DCHECK(execution_task_runner_->RunsTasksInCurrentSequence());
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

    // If |model_fb_| is going to be replaced below, it needs to be deleted on a
    // blockable thread.
    UnloadModel();

    if (!model_fb.has_value()) {
      std::move(model_loaded_callback).Run(model_fb.error());
      return;
    }
    model_fb_ = std::move(*model_fb);

    auto build_result = BuildModelExecutionTask(*model_fb_);
    if (build_result.has_value()) {
      loaded_model_ = std::move(build_result.value());
    }

    // Local histogram used in integration testing.
    base::BooleanHistogram::FactoryGet(
        "OptimizationGuide.ModelExecutor.ModelLoadedSuccessfully." +
            optimization_guide::GetStringNameForOptimizationTarget(
                optimization_target_),
        base::Histogram::kNoFlags)
        ->Add(!!loaded_model_);

    std::move(model_loaded_callback)
        .Run(build_result.error_or(ExecutionStatus::kSuccess));
  }

  // Loads the model file if not loaded yet on the background thread, and batch
  // executes it on the model execution thread.
  void LoadModelFileAndBatchExecute(
      BatchExecutionCallback callback_on_complete,
      ModelExecutor<OutputType, InputType>::ConstRefInputVector inputs) {
    DCHECK(execution_task_runner_->RunsTasksInCurrentSequence());
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

    if (!loaded_model_) {
      LoadModelFile(base::BindOnce(
          &TFLiteModelExecutor::BatchExecuteLoadedModelAndRunCallback,
          GetWeakPtrForExecutionThread(), std::move(callback_on_complete),
          inputs));
    } else {
      BatchExecuteLoadedModelAndRunCallback(std::move(callback_on_complete),
                                            inputs, ExecutionStatus::kSuccess);
    }
  }

  // Batch executes the loaded model for inputs.
  void BatchExecuteLoadedModel(
      ModelExecutor<OutputType, InputType>::ConstRefInputVector inputs,
      std::vector<std::optional<OutputType>>* outputs) {
    DCHECK(execution_task_runner_->RunsTasksInCurrentSequence());
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
    DCHECK(loaded_model_);

    if (last_execution_time_) {
      // The max of this histogram is 3m since only the distribution and count
      // of smaller values is important.
      base::UmaHistogramMediumTimes(
          "OptimizationGuide.ModelExecutor.TimeSincePreviousRun." +
              GetStringNameForOptimizationTarget(optimization_target_),
          base::TimeTicks::Now() - *last_execution_time_);
    }
    last_execution_time_ = base::TimeTicks::Now();

    for (const InputType& input : inputs) {
      ScopedExecutionStatusResultRecorder status_recorder(optimization_target_);
      // IMPORTANT: Once the arm method is called, disarm must be called when
      // the model execution finishes. Do NOT early-return in this next block.
      if (watchdog_) {
        watchdog_->ArmWithTask(MakeCancelClosure());
      }
      {
        TRACE_EVENT1("browser", "OptGuideModelExecutor::Execute",
                     "OptimizationTarget",
                     optimization_guide::GetStringNameForOptimizationTarget(
                         optimization_target_));
        base::ElapsedThreadTimer execution_timer;
        base::ElapsedTimer elapsed_timer;
        std::optional<OutputType> output = Execute(
            loaded_model_.get(), status_recorder.mutable_status(), input);
        DCHECK_NE(status_recorder.status(), ExecutionStatus::kUnknown);
        outputs->push_back(output);

        // The max of this histogram is 1 hour because we want to understand
        // tail behavior and catch long running model executions.
        base::UmaHistogramLongTimes(
            "OptimizationGuide.ModelExecutor.ExecutionLatency." +
                GetStringNameForOptimizationTarget(optimization_target_),
            elapsed_timer.Elapsed());
        base::UmaHistogramLongTimes(
            "OptimizationGuide.ModelExecutor.ExecutionThreadTime." +
                GetStringNameForOptimizationTarget(optimization_target_),
            execution_timer.Elapsed());
        base::UmaHistogramMicrosecondsTimes(
            "OptimizationGuide.ModelExecutor.ExecutionThreadTimeMicroseconds." +
                GetStringNameForOptimizationTarget(optimization_target_),
            execution_timer.Elapsed());
      }
      if (watchdog_) {
        watchdog_->DisarmOnExecutionComplete();
      }
    }
  }

  // Batch executes the loaded model and runs callback on the reply thread.
  // Unloads the model if needed.
  void BatchExecuteLoadedModelAndRunCallback(
      BatchExecutionCallback callback_on_complete,
      ModelExecutor<OutputType, InputType>::ConstRefInputVector inputs,
      ExecutionStatus execution_status) {
    DCHECK(execution_task_runner_->RunsTasksInCurrentSequence());
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

    std::vector<std::optional<OutputType>> outputs;
    outputs.reserve(inputs.size());
    if (!loaded_model_) {
      for (size_t i = 0; i < inputs.size(); i++) {
        outputs.push_back(std::nullopt);
        // If the model fails to load in a batch context, this status would not
        // get recorded the same number of times as it would in success. Thus,
        // increment the bucket |inputs.size()| number of times to keep metrics
        // sane.
        ScopedExecutionStatusResultRecorder status_recorder(
            optimization_target_);
        status_recorder.set_status(execution_status);
      }

      reply_task_runner_->PostTask(
          FROM_HERE, base::BindOnce(std::move(callback_on_complete), outputs));
      return;
    }

    BatchExecuteLoadedModel(inputs, &outputs);
    DCHECK(callback_on_complete);
    reply_task_runner_->PostTask(
        FROM_HERE, base::BindOnce(std::move(callback_on_complete), outputs));

    OnExecutionComplete();
  }

  void OnExecutionComplete() {
    DCHECK(execution_task_runner_->RunsTasksInCurrentSequence());
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
    if (should_unload_model_on_complete_) {
      UnloadModel();
    }
  }

  base::OnceClosure MakeCancelClosure() {
#if BUILDFLAG(BUILD_WITH_MEDIAPIPE_LIB)
    return base::DoNothing();
#else
    DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
    // |base::Unretained| is safe here since the watchdog itself guarantees the
    // lifetime of the stored pointer will not extend beyond when it is
    // disarmed.
    return base::BindOnce(&ModelExecutionTask::Cancel,
                          base::Unretained(loaded_model_.get()));
#endif
  }

  proto::OptimizationTarget optimization_target_ =
      proto::OptimizationTarget::OPTIMIZATION_TARGET_UNKNOWN;

  bool should_unload_model_on_complete_ = true;

  bool should_preload_model_ = false;

  std::unique_ptr<ModelExecutionTimeoutWatchdog, base::OnTaskRunnerDeleter>
      watchdog_;

  // Main thread for model execution. For synchronous model execution, this
  // needs to be the same caller thread.
  scoped_refptr<base::SequencedTaskRunner> execution_task_runner_;

  // Arbitrary thread for running reply tasks.
  scoped_refptr<base::SequencedTaskRunner> reply_task_runner_;

  // Background thread for model loading file I/O.
  scoped_refptr<base::SequencedTaskRunner> model_loading_task_runner_;

  // The time that the model was last executed. Logged in metrics for the second
  // and following runs.
  std::optional<base::TimeTicks> last_execution_time_
      GUARDED_BY_CONTEXT(sequence_checker_);

  // The model file path to be loaded. May be nullopt if no model has been
  // downloaded yet.
  std::optional<base::FilePath> model_file_path_
      GUARDED_BY_CONTEXT(sequence_checker_);

  // Note on lifetimes: |loaded_model_| and |model_fb_| both share the same
  // lifetime, being set in |LoadModelFile()| and being destroyed in
  // |UnloadModel()|.

  std::unique_ptr<ModelExecutionTaskType> loaded_model_
      GUARDED_BY_CONTEXT(sequence_checker_);

  // This will only be non-null when |model_file_path_| is set, and while the
  // model is loaded which is managed by a feature flag. `OnTaskRunnerDeleter`
  // is used to ensure that destruction occurs on a sequence that allows
  // blocking, since it involves closing a file handle.
  FileDeleteOnTaskRunner model_fb_ GUARDED_BY_CONTEXT(sequence_checker_) =
      NullFileDeleteOnTaskRunner();

  SEQUENCE_CHECKER(sequence_checker_);

  base::WeakPtrFactory<TFLiteModelExecutor>
      execution_sequence_weak_ptr_factory_{this};
};

}  // namespace optimization_guide

#endif  // COMPONENTS_OPTIMIZATION_GUIDE_CORE_TFLITE_MODEL_EXECUTOR_H_