File: page_text_dump_result.h

package info (click to toggle)
chromium 138.0.7204.183-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 6,080,960 kB
  • sloc: cpp: 34,937,079; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,954; asm: 946,768; xml: 739,971; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,811; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (156 lines) | stat: -rw-r--r-- 6,155 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_TEXT_DUMP_RESULT_H_
#define COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_TEXT_DUMP_RESULT_H_

#include <optional>
#include <set>
#include <string>

#include "components/optimization_guide/content/mojom/page_text_service.mojom.h"
#include "content/public/browser/global_routing_id.h"

namespace optimization_guide {

// This class contains the text dump from a single renderer and associated
// metadata. The lifecycle of this class is in two phases, preliminary and
// final. Preliminary is when the frame metadata is known, but the text dump has
// not been received yet. Once the text dump is received, then the dump is
// completed.
class FrameTextDumpResult {
 public:
  ~FrameTextDumpResult();
  FrameTextDumpResult(const FrameTextDumpResult&);

  // Creates a preliminary instance with the given metadata.
  static FrameTextDumpResult Initialize(mojom::TextDumpEvent event,
                                        content::GlobalRenderFrameHostId rfh_id,
                                        bool amp_frame,
                                        int unique_navigation_id);

  // Returns a copy of |this| that is completed with |contents|. This is only
  // expected to be called once on Preliminary instances.
  FrameTextDumpResult CompleteWithContents(
      const std::u16string& contents) const;

  // Whether the class instance is completed yet.
  bool IsCompleted() const;

  // The text dump contents. Set only for completed instances. Note that
  // the string must be treated as untrusted data.
  const std::optional<std::u16string>& contents() const { return contents_; }

  // The text dump contents, decoded to UTF-8 as a best effort. Set only for
  // completed instances. Note that the string must be treated as untrusted
  // data.
  std::optional<std::string> utf8_contents() const;

  // The event at which the text dump is taken. Set for both preliminary and
  // completed instances.
  mojom::TextDumpEvent event() const { return event_; }

  // The unique identifier for the content::RenderFrameHost that the text dump
  // was taken in. Set for both preliminary and completed instances.
  content::GlobalRenderFrameHostId rfh_id() const { return rfh_id_; }

  // The unique id of the visible navigation for this frame dump, taken from the
  // visible NavigationEntry.
  int unique_navigation_id() const { return unique_navigation_id_; }

  // Whether the frame the text dump is taken in an AMP frame. Set for both
  // preliminary and completed instances.
  bool amp_frame() const { return amp_frame_; }

  // These objects are sorted in the following manner:
  // * AMP frames first - When there are AMP frames on a page, it is expected
  // that they will contain the most content.
  // * Longer contents first - Most consumers will only be interested in some of
  // the page text. In this case, ensure that the biggest blob of text comes
  // first since that is most likely to be the main content on the page.
  // * Later events first - The later in the page's lifetime a text dump is
  // taken, the more likely that it is complete.
  // * Everything else is just done willy-nilly for completeness of equality
  // checking.
  inline bool operator<(const FrameTextDumpResult& rhs) const {
    if (amp_frame() != rhs.amp_frame()) {
      return amp_frame();
    }

    size_t lhs_size = contents() ? contents()->size() : 0;
    size_t rhs_size = rhs.contents() ? rhs.contents()->size() : 0;
    if (lhs_size != rhs_size) {
      // Note the reverse ordering to put longer contents first.
      return lhs_size > rhs_size;
    }

    if (event() != rhs.event()) {
      // Note the reverse ordering to put later events first.
      return event() > rhs.event();
    }

    return std::tie(rfh_id_, contents_, unique_navigation_id_) <
           std::tie(rhs.rfh_id_, rhs.contents_, rhs.unique_navigation_id_);
  }

  inline bool operator==(const FrameTextDumpResult& other) const {
    return std::tie(event_, contents_, rfh_id_, amp_frame_,
                    unique_navigation_id_) ==
           std::tie(other.event_, other.contents_, other.rfh_id_,
                    other.amp_frame_, other.unique_navigation_id_);
  }

 private:
  FrameTextDumpResult();

  mojom::TextDumpEvent event_;
  std::optional<std::u16string> contents_;
  content::GlobalRenderFrameHostId rfh_id_;
  bool amp_frame_ = false;
  int unique_navigation_id_ = -1;
};

// Contains 0 or more FrameTextDumpResults from the same page load.
class PageTextDumpResult {
 public:
  PageTextDumpResult();
  PageTextDumpResult(const PageTextDumpResult&);
  ~PageTextDumpResult();

  // Adds another frame text dump to |this|.
  void AddFrameTextDumpResult(const FrameTextDumpResult& frame_result);

  // Returns the concatenation of all AMP frames. nullopt if no AMP frames are
  // present. Note that the string must be treated as untrusted data.
  std::optional<std::string> GetAMPTextContent() const;

  // Returns the concatenation of the mainframe. nullopt if not present.
  // Note that the string must be treated as untrusted data.
  std::optional<std::string> GetMainFrameTextContent() const;

  // Returns the concatenation of all frames, AMP or main. nullopt if |empty()|.
  // Note that the string must be treated as untrusted data.
  std::optional<std::string> GetAllFramesTextContent() const;

  bool empty() const { return frame_results_.empty(); }

  const std::set<FrameTextDumpResult>& frame_results() const {
    return frame_results_;
  }

  inline bool operator==(const PageTextDumpResult& other) const {
    return frame_results_ == other.frame_results_;
  }

 private:
  std::set<FrameTextDumpResult> frame_results_;
};

// Useful for debugging.
std::ostream& operator<<(std::ostream& os, const FrameTextDumpResult& frame);
std::ostream& operator<<(std::ostream& os, const PageTextDumpResult& page);

}  // namespace optimization_guide

#endif  // COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_TEXT_DUMP_RESULT_H_