File: pdf_ocr_helper.h

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (144 lines) | stat: -rw-r--r-- 5,728 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_PDF_RENDERER_PDF_OCR_HELPER_H_
#define COMPONENTS_PDF_RENDERER_PDF_OCR_HELPER_H_

#include <vector>

#include "base/containers/queue.h"
#include "base/memory/weak_ptr.h"
#include "base/sequence_checker.h"
#include "content/public/renderer/render_frame_observer.h"
#include "mojo/public/cpp/bindings/remote.h"
#include "pdf/accessibility_structs.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
#include "ui/accessibility/ax_node_id_forward.h"
#include "ui/accessibility/ax_tree_update.h"

namespace chrome_pdf {
class PdfAccessibilityImageFetcher;
}  // namespace chrome_pdf

namespace content {
class RenderFrame;
}  // namespace content

namespace pdf {

// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
// LINT.IfChange(PdfOcrRequestStatus)
enum class PdfOcrRequestStatus {
  kRequested = 0,
  kPerformed = 1,
  kMaxValue = kPerformed,
};
// LINT.ThenChange(/tools/metrics/histograms/metadata/accessibility/enums.xml:PdfOcrRequestStatus)

// Used for storing OCR requests either before performing an OCR job, or after
// the results have been received. This is for scheduling the work in another
// task in batches in order to unblock the user from reading a partially
// OCRed PDF, and in order to avoid sending all the images to the OCR Helper
// at once, in case the PDF is closed halfway through the OCR process.
struct PdfOcrRequest {
  PdfOcrRequest(const ui::AXNodeID& image_node_id,
                const chrome_pdf::AccessibilityImageInfo& image,
                const ui::AXNodeID& root_node_id,
                const ui::AXNodeID& parent_node_id,
                const ui::AXNodeID& page_node_id,
                uint32_t page_index);
  PdfOcrRequest(const PdfOcrRequest& other);

  const ui::AXNodeID image_node_id;
  const chrome_pdf::AccessibilityImageInfo image;
  const ui::AXNodeID root_node_id;
  const ui::AXNodeID parent_node_id;
  const ui::AXNodeID page_node_id;
  const uint32_t page_index;
  // This boolean indicates which request corresponds to the last image on
  // each page.
  bool is_last_on_page = false;

  // This field is set after the image is extracted from PDF.
  gfx::SizeF image_pixel_size;
};

// Manages the connection to the OCR Service via Mojo, and ensures that
// requests are sent in order and that responses are batched.
class PdfOcrHelper : public content::RenderFrameObserver {
 public:
  using OnOcrDataReceivedCallback =
      base::RepeatingCallback<void(std::vector<PdfOcrRequest> ocr_requests,
                                   std::vector<ui::AXTreeUpdate> tree_updates)>;

  PdfOcrHelper(chrome_pdf::PdfAccessibilityImageFetcher* image_fetcher,
               content::RenderFrame& render_frame,
               ui::AXNodeID root_node_id,
               uint32_t page_count,
               OnOcrDataReceivedCallback callback);

  PdfOcrHelper(const PdfOcrHelper&) = delete;
  PdfOcrHelper& operator=(const PdfOcrHelper&) = delete;

  ~PdfOcrHelper() override;

  // If the OCR Helper is created before the PDF is loaded or reloaded, i.e.
  // before `PdfAccessibilityTree::SetAccessibilityDocInfo` is called,
  // previous requests are removed and page count and root node are re-set.
  void Reset(ui::AXNodeID root_node_id, uint32_t page_count);
  void OcrPage(base::queue<PdfOcrRequest> page_requests);
  bool AreAllPagesOcred() const;
  bool AreAllPagesInBatchOcred() const;
  void SetScreenAIAnnotatorForTesting(
      mojo::PendingRemote<screen_ai::mojom::ScreenAIAnnotator>
          screen_ai_annotator);
  void ResetRemainingPageCountForTesting();
  uint32_t pages_per_batch_for_testing() const { return pages_per_batch_; }

  // content::RenderFrameObserver:
  void OnDestruct() override {}

 private:
  static uint32_t ComputePagesPerBatch(uint32_t page_count);
  void OcrNextImage();
  void ReceiveOcrResultsForImage(PdfOcrRequest request,
                                 const ui::AXTreeUpdate& tree_update);

  // If `screen_ai_annotator_` is not connected to OCR service and
  // `render_frame_` is available, tries to connect it to the OCR service.
  void MaybeConnectToOcrService();

  // `image_fetcher_` owns `this`.
  const raw_ptr<chrome_pdf::PdfAccessibilityImageFetcher> image_fetcher_;

  uint32_t pages_per_batch_;
  uint32_t remaining_page_count_;
  ui::AXNodeID root_node_id_;

  // True if there are pending OCR requests. Used to determine if `OcrPage`
  // should call `OcrNextImage` or if the next call to
  // `ReceiveOcrResultsForImage` should do it instead. This avoids the
  // possibility of processing requests in the wrong order.
  bool is_ocr_in_progress_ = false;

  // A PDF is made up of a number of pages, and each page might have one or
  // more inaccessible images that need to be OCRed. This queue could contain
  // the OCR requests for all the images on several pages, so the requests
  // from each page are concatenated together into a single queue.
  // `PdfOcrRequest.is_last_on_page` indicates which request is the last on
  // each page.
  base::queue<PdfOcrRequest> all_requests_;
  std::vector<PdfOcrRequest> batch_requests_;
  std::vector<ui::AXTreeUpdate> batch_tree_updates_;
  OnOcrDataReceivedCallback on_ocr_data_received_callback_;
  mojo::Remote<screen_ai::mojom::ScreenAIAnnotator> screen_ai_annotator_;
  SEQUENCE_CHECKER(sequence_checker_);
  // Needs to be kept last so that it would be destructed first.
  base::WeakPtrFactory<PdfOcrHelper> weak_ptr_factory_{this};
};

}  // namespace pdf

#endif  // COMPONENTS_PDF_RENDERER_PDF_OCR_HELPER_H_