File: TextDirectiveCreator.h

package info (click to toggle)
firefox 143.0.3-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 4,617,328 kB
  • sloc: cpp: 7,478,492; javascript: 6,417,157; ansic: 3,720,058; python: 1,396,372; xml: 627,523; asm: 438,677; java: 186,156; sh: 63,477; makefile: 19,171; objc: 13,059; perl: 12,983; yacc: 4,583; cs: 3,846; pascal: 3,405; lex: 1,720; ruby: 1,003; exp: 762; php: 436; lisp: 258; awk: 247; sql: 66; sed: 53; csh: 10
file content (320 lines) | stat: -rw-r--r-- 12,574 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef DOM_TEXTDIRECTIVECREATOR_H_
#define DOM_TEXTDIRECTIVECREATOR_H_

#include <tuple>

#include "RangeBoundary.h"
#include "TextDirectiveUtil.h"
#include "mozilla/RefPtr.h"
#include "mozilla/Result.h"
#include "mozilla/dom/fragmentdirectives_ffi_generated.h"
#include "nsStringFwd.h"

class nsRange;

namespace mozilla {
class ErrorResult;
}

namespace mozilla::dom {
class Document;
/**
 * @brief Helper class to create a text directive string from a given `Range`.
 *
 * The class provides a public static creator function which encapsulates all
 * necessary logic.
 * This class serves as a base class that defines the main algorithm, and is
 * subclassed twice for exact and range-based matching.
 */
class TextDirectiveCreator {
 public:
  /**
   * @brief Static creator function. Takes a `Range` and creates a text
   *        directive string, if possible.
   *
   * @param aDocument   The document in which `aInputRange` lives.
   * @param aInputRange The input range. This range will not be modified.
   * @param aWatchdog   A watchdog to ensure the operation does not run
   *                    longer than the predefined timeout.
   *
   * @return Returns a percent-encoded text directive string on success, an
   *         empty string if it's not possible to create a text fragment for the
   *         given range, or an error code.
   */
  static Result<nsCString, ErrorResult> CreateTextDirectiveFromRange(
      Document* aDocument, AbstractRange* aInputRange,
      const TimeoutWatchdog* aWatchdog);

  virtual ~TextDirectiveCreator();

 protected:
  TextDirectiveCreator(Document* aDocument, AbstractRange* aRange,
                       const TimeoutWatchdog* aWatchdog);

  /**
   * @brief Ensures the boundary points of the range point to word boundaries.
   *
   * This function always returns a new range.
   */
  static Result<RefPtr<AbstractRange>, ErrorResult> ExtendRangeToWordBoundaries(
      AbstractRange* aRange);

  /**
   * @brief Determines whether exact or range-based matching should be used.
   *
   * This function searches for a block boundary in `aRange`, which requires
   * range-based matching. If there is no block boundary, but the range content
   * is longer than a threshold, range-based matching is used as well.
   * This threshold is defined by the pref
   * `dom.text_fragments.create_text_fragment.exact_match_max_length`.
   *
   */
  static Result<bool, ErrorResult> MustUseRangeBasedMatching(
      AbstractRange* aRange);

  /**
   * @brief Creates an instance either for exact or range-based matching.
   */
  static Result<UniquePtr<TextDirectiveCreator>, ErrorResult> CreateInstance(
      Document* aDocument, AbstractRange* aRange,
      const TimeoutWatchdog* aWatchdog);

  /**
   * @brief Collects text content surrounding the target range.
   *
   * The context terms are then stored both in normal and fold case form.
   *
   * Returns false if the algorithm cannot continue, for example if the text
   * directive must use range-based matching because of its length, but the
   * target range only consists of one word.
   */
  virtual Result<bool, ErrorResult> CollectContextTerms() = 0;

  /**
   * @brief Common helper which collects the prefix term of the target range.
   */
  Result<Ok, ErrorResult> CollectPrefixContextTerm();

  /**
   * @brief Common helper which collects the suffix term of the target range.
   */
  Result<Ok, ErrorResult> CollectSuffixContextTerm();

  /**
   * @brief Collect the word begin / word end distances for the context terms.
   *
   * For start (for range-based matching) and suffix terms, the search direction
   * is left-to-right. Therefore, the distances are based off the beginning of
   * the context terms and use the word end boundary.
   *
   * For prefix and end (for range-based matching), the search direction is
   * right-to-left. Therefore, the distances are based off the end of the
   * context terms and use the word start boundary.
   *
   * The distances are always sorted, so that the first entry points to the
   * nearest word boundary in search direction.
   */
  virtual void CollectContextTermWordBoundaryDistances() = 0;

  /**
   * @brief Searches the document for other occurrences of the target range and
   *        converts the results into a comparable format.
   *
   * This method searches the partial document from the beginning up to the
   * target range for occurrences of the target range content.
   * This needs to be done differently based on whether matching is exact or
   * range-based. For exact matching, the whole text content of the target range
   * is searched for. For range-based matching, two search runs are required:
   * One for the minimal `start` term (ie., the first word), which ends at the
   * beginning of the target range. And one for the minimal `end` term (ie., the
   * last word), which starts at the beginning of the target range and ends
   * _before_ its end.
   * The resulting lists of matching ranges do not exclude the target range.
   */
  virtual Result<Ok, ErrorResult> FindAllMatchingCandidates() = 0;

  /**
   * @brief Find all occurrences of `aSearchQuery` in the partial document.
   *
   * This method uses `nsFind` to perform a case-insensitive search for
   * `aSearchQuery` in the partial document from `aSearchStart` to `aSearchEnd`.
   *
   * @return List of `Range`s which have the case-insensitive-same content as
   *         `aSearchQuery`.
   */
  Result<nsTArray<RefPtr<AbstractRange>>, ErrorResult> FindAllMatchingRanges(
      const nsString& aSearchQuery, const RangeBoundary& aSearchStart,
      const RangeBoundary& aSearchEnd);

  /**
   * @brief Creates the shortest possible text directive.
   *
   * @return A percent-encoded string containing a text directive. Returns empty
   *         string in cases where it's not possible to create a text directive.
   */
  Result<nsCString, ErrorResult> CreateTextDirective();

  /**
   * @brief Creates unique substring length arrays which are extended to the
   *        nearest word boundary.
   */
  static std::tuple<nsTArray<uint32_t>, nsTArray<uint32_t>>
  ExtendSubstringLengthsToWordBoundaries(
      const nsTArray<std::tuple<uint32_t, uint32_t>>& aExactSubstringLengths,
      const Span<const uint32_t>& aFirstWordPositions,
      const Span<const uint32_t>& aSecondWordPositions);

  /**
   * @brief Test all combinations to identify the shortest text directive.
   */
  virtual Maybe<TextDirective> FindShortestCombination() const = 0;

  /**
   * @brief Perform a brute-force optimization run to find the shortest
   *        combination of a combination of two context terms.
   *
   * Each combination of the extended values is compared against all exact
   * values. It is only considered valid if at least one value is longer than
   * the exact lengths.
   *
   * @param aExactWordLengths               Array of tuples containing the exact
   *                                        common sub string lengths of this
   *                                        combination.
   * @param aFirstExtendedToWordBoundaries  All valid substring lengths for the
   *                                        first context term, extended to its
   *                                        next word boundary in reading
   *                                        direction.
   * @param aSecondExtendedToWordBoundaries All valid substring lengths for the
   *                                        second context term, extended to its
   *                                        next word boundary in reading
   *                                        direction.
   * @return A tuple of sub string lengths extended to word boundaries, which is
   *         the shortest allowed combination to eliminate all matches.
   *         Returns `Nothing` if it's not possible to eliminate all matches.
   */
  static Maybe<std::tuple<uint32_t, uint32_t>> CheckAllCombinations(
      const nsTArray<std::tuple<uint32_t, uint32_t>>& aExactWordLengths,
      const nsTArray<uint32_t>& aFirstExtendedToWordBoundaries,
      const nsTArray<uint32_t>& aSecondExtendedToWordBoundaries);

  // The maximum length of the context terms around the target range.
  // If a context term is longer, it will be truncated to this length.
  // If -- which seems highly unlikely -- there is another match for the target
  // range which happens to have a context term with the same content, it will
  // essentially be ignored:
  // The common length would be equal to this value, also the maximum common
  // length extended to the word boundary. Therefore, the algorithm could not
  // find a candidate which exceeds this length, therefore ignoring it
  // altogether.
  //
  // If -- even more unlikely -- this condition would happen for _every_ context
  // term, the algorithm would determine that it cannot create a text
  // directive for the target range because it would be ambiguous.
  static constexpr uint32_t kMaxContextTermLength = 1024;

  nsString mPrefixContent;
  nsString mPrefixFoldCaseContent;
  nsTArray<uint32_t> mPrefixWordBeginDistances;

  nsString mStartContent;

  nsString mSuffixContent;
  nsString mSuffixFoldCaseContent;
  nsTArray<uint32_t> mSuffixWordEndDistances;

  NotNull<RefPtr<Document>> mDocument;
  NotNull<RefPtr<AbstractRange>> mRange;

  NotNull<RefPtr<nsFind>> mFinder;

  /**
   * The watchdog ensures that the algorithm exits after a defined time
   * duration, to ensure that the main thread is not blocked for too long.
   *
   * The duration is defined by the pref
   * `dom.text_fragments.create_text_fragment.timeout`.
   */
  RefPtr<const TimeoutWatchdog> mWatchdog;

  nsContentUtils::NodeIndexCache mNodeIndexCache;
};

/**
 * @brief Creator class which creates a range-based text directive.
 *
 */
class RangeBasedTextDirectiveCreator : public TextDirectiveCreator {
 private:
  using TextDirectiveCreator::TextDirectiveCreator;

  Result<bool, ErrorResult> CollectContextTerms() override;

  void CollectContextTermWordBoundaryDistances() override;

  Result<Ok, ErrorResult> FindAllMatchingCandidates() override;

  void FindStartMatchCommonSubstringLengths(
      const nsTArray<RefPtr<AbstractRange>>& aMatchRanges);

  void FindEndMatchCommonSubstringLengths(
      const nsTArray<RefPtr<AbstractRange>>& aMatchRanges);

  Maybe<TextDirective> FindShortestCombination() const override;

  nsString mEndContent;
  // The fold case contents for start and end terms don't include the first/last
  // word of the start and end terms, because they are only used for finding the
  // common lengths for other matches.
  nsString mStartFoldCaseContent;
  nsString mEndFoldCaseContent;

  // These values are only passed into nsFind, therefore fold case is not
  // required.
  nsString mFirstWordOfStartContent;
  nsString mLastWordOfEndContent;

  // The lengths of the first/last word of the start and end terms, including
  // whitespace to the next word.
  // Therefore, these values are equal to
  // `m[Start|End]Content.Length() - m[Start|End]FoldCaseContent.Length()`.
  uint32_t mStartFirstWordLengthIncludingWhitespace = 0;
  uint32_t mEndLastWordLengthIncludingWhitespace = 0;

  // The distances are bound to the Fold Case Content strings, which do not
  // include the first/last word of the start and end terms.
  nsTArray<uint32_t> mStartWordEndDistances;
  nsTArray<uint32_t> mEndWordBeginDistances;

  nsTArray<std::tuple<uint32_t, uint32_t>> mStartMatchCommonSubstringLengths;
  nsTArray<std::tuple<uint32_t, uint32_t>> mEndMatchCommonSubstringLengths;
};

/**
 * @brief Creator class which creates an exact match text directive.
 *
 */
class ExactMatchTextDirectiveCreator : public TextDirectiveCreator {
 private:
  using TextDirectiveCreator::TextDirectiveCreator;

  Result<bool, ErrorResult> CollectContextTerms() override;

  void CollectContextTermWordBoundaryDistances() override;

  Result<Ok, ErrorResult> FindAllMatchingCandidates() override;

  void FindCommonSubstringLengths(
      const nsTArray<RefPtr<AbstractRange>>& aMatchRanges);

  Maybe<TextDirective> FindShortestCombination() const override;

  nsTArray<std::tuple<uint32_t, uint32_t>> mCommonSubstringLengths;
};
}  // namespace mozilla::dom
#endif