File: optical_character_recognizer.h

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (162 lines) | stat: -rw-r--r-- 6,429 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_BROWSER_SCREEN_AI_PUBLIC_OPTICAL_CHARACTER_RECOGNIZER_H_
#define CHROME_BROWSER_SCREEN_AI_PUBLIC_OPTICAL_CHARACTER_RECOGNIZER_H_

#include <optional>

#include "base/memory/raw_ptr.h"
#include "base/memory/ref_counted_delete_on_sequence.h"
#include "base/memory/weak_ptr.h"
#include "base/scoped_observation.h"
#include "base/sequence_checker.h"
#include "base/threading/sequence_bound.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/profiles/profile_observer.h"
#include "mojo/public/cpp/bindings/remote.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"

namespace {
class SequenceBoundReceiver;
}

namespace screen_ai {

// A simple class to initialize and perform OCR service.
// See OCR section in `chrome/browser/screen_ai/README.md` for more info.
class OpticalCharacterRecognizer
    : public ProfileObserver,
      public base::RefCountedDeleteOnSequence<OpticalCharacterRecognizer> {
 public:
  using OcrDisconnectedCallback = base::RepeatingCallback<void()>;

  // Creates OCR using ScreenAI service instance for `profile`. If needed,
  // triggers download and initialization of the component. Calls
  // `status_callback` asynchronously after the OCR has been created and the
  // service initialization status is known.
  static scoped_refptr<screen_ai::OpticalCharacterRecognizer>
  CreateWithStatusCallback(Profile* profile,
                           mojom::OcrClientType client_type,
                           base::OnceCallback<void(bool)> status_callback);

  // Creates OCR using ScreenAI service instance for `profile`. If needed,
  // triggers download and initialization of the component.
  static scoped_refptr<screen_ai::OpticalCharacterRecognizer> Create(
      Profile* profile,
      mojom::OcrClientType client_type);

  // Creates OCR for testing. The object will not be connected to ScreenAI
  // service and always returns empty results.
  static scoped_refptr<screen_ai::OpticalCharacterRecognizer>
  CreateForTesting();

  OpticalCharacterRecognizer(const OpticalCharacterRecognizer&) = delete;
  OpticalCharacterRecognizer& operator=(const OpticalCharacterRecognizer&) =
      delete;

  // ProfileObserver::
  void OnProfileWillBeDestroyed(Profile* profile) override;

  // Returns true if OCR service is ready. This state will be preserved if the
  // connection to the OCR service is reset due to being idle or if the service
  // is shut down. It is expected that the connection would be revivable when
  // needed.
  bool is_ready() { return ready_ && *ready_; }

  bool is_connected() {
    return screen_ai_annotator_ && screen_ai_annotator_->is_bound() &&
           screen_ai_annotator_->is_connected();
  }

  bool StatusAvailableForTesting() { return ready_.has_value(); }

  // Connects to the OCR service if not already connected.
  void MaybeConnectToOcrService();

  // Performs OCR on the given image and returns the results as a
  // `VisualAnnotation` struct. Returns empty results in the callback if the
  // service is not ready yet.
  virtual void PerformOCR(
      const SkBitmap& image,
      base::OnceCallback<void(mojom::VisualAnnotationPtr)> callback);

#if BUILDFLAG(IS_CHROMEOS)
  // Performs OCR on the given image and returns the results as an accessibility
  // tree update. Returns empty results in the callback if the service is not
  // ready yet.
  virtual void PerformOCR(
      const SkBitmap& image,
      base::OnceCallback<void(const ui::AXTreeUpdate& tree_update)> callback);
#endif  // BUILDFLAG(IS_CHROMEOS)

  // Ensures all posted tasks are completed in tests.
  virtual void FlushForTesting() {}

  // Disconnects from ScreenAI service. This can also be used to simulate idle
  // timeout or service shutdown/crash for testing.
  void DisconnectAnnotator();

  // Sets the callback for the disconnection of `screen_ai_annotator_`. It will
  // be triggered from the UI thread, and it's the responsibility of the client
  // to execute it on the right thread.
  void SetDisconnectedCallback(OcrDisconnectedCallback callback);

  // Returns the maximum dimension for which images are processed without
  // downsampling. This value is not expected to change after initialization of
  // the service and is expected to be non-zero. Returns 0 in the callback if
  // the service is not ready yet.
  void GetMaxImageDimension(base::OnceCallback<void(uint32_t)> callback);

 protected:
  explicit OpticalCharacterRecognizer(Profile* profile,
                                      mojom::OcrClientType client_type);
  ~OpticalCharacterRecognizer() override;

  // OCR Service is ready to use. The value is set after initialization has
  // finished successfully or with failure.
  std::optional<bool> ready_;

 private:
  friend class base::RefCountedDeleteOnSequence<OpticalCharacterRecognizer>;
  friend class base::DeleteHelper<OpticalCharacterRecognizer>;
  template <typename T, typename... Args>
  friend scoped_refptr<T> base::MakeRefCounted(Args&&... args);

  void Initialize(base::OnceCallback<void(bool)> status_callback);

  // `status_callback` will receive a copy of `successful`.
  void OnOCRInitializationCallback(
      base::OnceCallback<void(bool)> status_callback,
      bool successful);

  void OnOcrDisconnected();

  // Is initialized in the constructor and is cleared if profile gets destroyed
  // while this object still exists.
  raw_ptr<Profile> profile_;

  mojom::OcrClientType client_type_;

  // Called when the `screen_ai_annotator_` is disconnected.
  OcrDisconnectedCallback ocr_disconnected_callback_;

  // For calls from another sequence, this object keeps a pointer to the task
  // scheduler of the other sequence to return the result.
  // Each `OpticalCharacterRecognizer` object can be used for at most one other
  // sequence.
  std::unique_ptr<base::SequenceBound<SequenceBoundReceiver>>
      sequence_bound_receiver_;

  std::unique_ptr<mojo::Remote<mojom::ScreenAIAnnotator>> screen_ai_annotator_;

  base::ScopedObservation<Profile, ProfileObserver> profile_observer_{this};

  SEQUENCE_CHECKER(sequence_checker_);
  base::WeakPtrFactory<OpticalCharacterRecognizer> weak_ptr_factory_{this};
};

}  // namespace screen_ai

#endif  // CHROME_BROWSER_SCREEN_AI_PUBLIC_OPTICAL_CHARACTER_RECOGNIZER_H_