1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef DOM_TEXTDIRECTIVECREATOR_H_
#define DOM_TEXTDIRECTIVECREATOR_H_
#include <tuple>
#include "RangeBoundary.h"
#include "TextDirectiveUtil.h"
#include "mozilla/RefPtr.h"
#include "mozilla/Result.h"
#include "mozilla/dom/fragmentdirectives_ffi_generated.h"
#include "nsStringFwd.h"
class nsRange;
namespace mozilla {
class ErrorResult;
}
namespace mozilla::dom {
class Document;
/**
* @brief Helper class to create a text directive string from a given `Range`.
*
* The class provides a public static creator function which encapsulates all
* necessary logic.
* This class serves as a base class that defines the main algorithm, and is
* subclassed twice for exact and range-based matching.
*/
class TextDirectiveCreator {
public:
/**
* @brief Static creator function. Takes a `Range` and creates a text
* directive string, if possible.
*
* @param aDocument The document in which `aInputRange` lives.
* @param aInputRange The input range. This range will not be modified.
* @param aWatchdog A watchdog to ensure the operation does not run
* longer than the predefined timeout.
*
* @return Returns a percent-encoded text directive string on success, an
* empty string if it's not possible to create a text fragment for the
* given range, or an error code.
*/
static Result<nsCString, ErrorResult> CreateTextDirectiveFromRange(
Document* aDocument, AbstractRange* aInputRange,
const TimeoutWatchdog* aWatchdog);
virtual ~TextDirectiveCreator();
protected:
TextDirectiveCreator(Document* aDocument, AbstractRange* aRange,
const TimeoutWatchdog* aWatchdog);
/**
* @brief Ensures the boundary points of the range point to word boundaries.
*
* This function always returns a new range.
*/
static Result<RefPtr<AbstractRange>, ErrorResult> ExtendRangeToWordBoundaries(
AbstractRange* aRange);
/**
* @brief Determines whether exact or range-based matching should be used.
*
* This function searches for a block boundary in `aRange`, which requires
* range-based matching. If there is no block boundary, but the range content
* is longer than a threshold, range-based matching is used as well.
* This threshold is defined by the pref
* `dom.text_fragments.create_text_fragment.exact_match_max_length`.
*
*/
static Result<bool, ErrorResult> MustUseRangeBasedMatching(
AbstractRange* aRange);
/**
* @brief Creates an instance either for exact or range-based matching.
*/
static Result<UniquePtr<TextDirectiveCreator>, ErrorResult> CreateInstance(
Document* aDocument, AbstractRange* aRange,
const TimeoutWatchdog* aWatchdog);
/**
* @brief Collects text content surrounding the target range.
*
* The context terms are then stored both in normal and fold case form.
*
* Returns false if the algorithm cannot continue, for example if the text
* directive must use range-based matching because of its length, but the
* target range only consists of one word.
*/
virtual Result<bool, ErrorResult> CollectContextTerms() = 0;
/**
* @brief Common helper which collects the prefix term of the target range.
*/
Result<Ok, ErrorResult> CollectPrefixContextTerm();
/**
* @brief Common helper which collects the suffix term of the target range.
*/
Result<Ok, ErrorResult> CollectSuffixContextTerm();
/**
* @brief Collect the word begin / word end distances for the context terms.
*
* For start (for range-based matching) and suffix terms, the search direction
* is left-to-right. Therefore, the distances are based off the beginning of
* the context terms and use the word end boundary.
*
* For prefix and end (for range-based matching), the search direction is
* right-to-left. Therefore, the distances are based off the end of the
* context terms and use the word start boundary.
*
* The distances are always sorted, so that the first entry points to the
* nearest word boundary in search direction.
*/
virtual void CollectContextTermWordBoundaryDistances() = 0;
/**
* @brief Searches the document for other occurrences of the target range and
* converts the results into a comparable format.
*
* This method searches the partial document from the beginning up to the
* target range for occurrences of the target range content.
* This needs to be done differently based on whether matching is exact or
* range-based. For exact matching, the whole text content of the target range
* is searched for. For range-based matching, two search runs are required:
* One for the minimal `start` term (ie., the first word), which ends at the
* beginning of the target range. And one for the minimal `end` term (ie., the
* last word), which starts at the beginning of the target range and ends
* _before_ its end.
* The resulting lists of matching ranges do not exclude the target range.
*/
virtual Result<Ok, ErrorResult> FindAllMatchingCandidates() = 0;
/**
* @brief Find all occurrences of `aSearchQuery` in the partial document.
*
* This method uses `nsFind` to perform a case-insensitive search for
* `aSearchQuery` in the partial document from `aSearchStart` to `aSearchEnd`.
*
* @return List of `Range`s which have the case-insensitive-same content as
* `aSearchQuery`.
*/
Result<nsTArray<RefPtr<AbstractRange>>, ErrorResult> FindAllMatchingRanges(
const nsString& aSearchQuery, const RangeBoundary& aSearchStart,
const RangeBoundary& aSearchEnd);
/**
* @brief Creates the shortest possible text directive.
*
* @return A percent-encoded string containing a text directive. Returns empty
* string in cases where it's not possible to create a text directive.
*/
Result<nsCString, ErrorResult> CreateTextDirective();
/**
* @brief Creates unique substring length arrays which are extended to the
* nearest word boundary.
*/
static std::tuple<nsTArray<uint32_t>, nsTArray<uint32_t>>
ExtendSubstringLengthsToWordBoundaries(
const nsTArray<std::tuple<uint32_t, uint32_t>>& aExactSubstringLengths,
const Span<const uint32_t>& aFirstWordPositions,
const Span<const uint32_t>& aSecondWordPositions);
/**
* @brief Test all combinations to identify the shortest text directive.
*/
virtual Maybe<TextDirective> FindShortestCombination() const = 0;
/**
* @brief Perform a brute-force optimization run to find the shortest
* combination of a combination of two context terms.
*
* Each combination of the extended values is compared against all exact
* values. It is only considered valid if at least one value is longer than
* the exact lengths.
*
* @param aExactWordLengths Array of tuples containing the exact
* common sub string lengths of this
* combination.
* @param aFirstExtendedToWordBoundaries All valid substring lengths for the
* first context term, extended to its
* next word boundary in reading
* direction.
* @param aSecondExtendedToWordBoundaries All valid substring lengths for the
* second context term, extended to its
* next word boundary in reading
* direction.
* @return A tuple of sub string lengths extended to word boundaries, which is
* the shortest allowed combination to eliminate all matches.
* Returns `Nothing` if it's not possible to eliminate all matches.
*/
static Maybe<std::tuple<uint32_t, uint32_t>> CheckAllCombinations(
const nsTArray<std::tuple<uint32_t, uint32_t>>& aExactWordLengths,
const nsTArray<uint32_t>& aFirstExtendedToWordBoundaries,
const nsTArray<uint32_t>& aSecondExtendedToWordBoundaries);
// The maximum length of the context terms around the target range.
// If a context term is longer, it will be truncated to this length.
// If -- which seems highly unlikely -- there is another match for the target
// range which happens to have a context term with the same content, it will
// essentially be ignored:
// The common length would be equal to this value, also the maximum common
// length extended to the word boundary. Therefore, the algorithm could not
// find a candidate which exceeds this length, therefore ignoring it
// altogether.
//
// If -- even more unlikely -- this condition would happen for _every_ context
// term, the algorithm would determine that it cannot create a text
// directive for the target range because it would be ambiguous.
static constexpr uint32_t kMaxContextTermLength = 1024;
nsString mPrefixContent;
nsString mPrefixFoldCaseContent;
nsTArray<uint32_t> mPrefixWordBeginDistances;
nsString mStartContent;
nsString mSuffixContent;
nsString mSuffixFoldCaseContent;
nsTArray<uint32_t> mSuffixWordEndDistances;
NotNull<RefPtr<Document>> mDocument;
NotNull<RefPtr<AbstractRange>> mRange;
NotNull<RefPtr<nsFind>> mFinder;
/**
* The watchdog ensures that the algorithm exits after a defined time
* duration, to ensure that the main thread is not blocked for too long.
*
* The duration is defined by the pref
* `dom.text_fragments.create_text_fragment.timeout`.
*/
RefPtr<const TimeoutWatchdog> mWatchdog;
nsContentUtils::NodeIndexCache mNodeIndexCache;
};
/**
* @brief Creator class which creates a range-based text directive.
*
*/
class RangeBasedTextDirectiveCreator : public TextDirectiveCreator {
private:
using TextDirectiveCreator::TextDirectiveCreator;
Result<bool, ErrorResult> CollectContextTerms() override;
void CollectContextTermWordBoundaryDistances() override;
Result<Ok, ErrorResult> FindAllMatchingCandidates() override;
void FindStartMatchCommonSubstringLengths(
const nsTArray<RefPtr<AbstractRange>>& aMatchRanges);
void FindEndMatchCommonSubstringLengths(
const nsTArray<RefPtr<AbstractRange>>& aMatchRanges);
Maybe<TextDirective> FindShortestCombination() const override;
nsString mEndContent;
// The fold case contents for start and end terms don't include the first/last
// word of the start and end terms, because they are only used for finding the
// common lengths for other matches.
nsString mStartFoldCaseContent;
nsString mEndFoldCaseContent;
// These values are only passed into nsFind, therefore fold case is not
// required.
nsString mFirstWordOfStartContent;
nsString mLastWordOfEndContent;
// The lengths of the first/last word of the start and end terms, including
// whitespace to the next word.
// Therefore, these values are equal to
// `m[Start|End]Content.Length() - m[Start|End]FoldCaseContent.Length()`.
uint32_t mStartFirstWordLengthIncludingWhitespace = 0;
uint32_t mEndLastWordLengthIncludingWhitespace = 0;
// The distances are bound to the Fold Case Content strings, which do not
// include the first/last word of the start and end terms.
nsTArray<uint32_t> mStartWordEndDistances;
nsTArray<uint32_t> mEndWordBeginDistances;
nsTArray<std::tuple<uint32_t, uint32_t>> mStartMatchCommonSubstringLengths;
nsTArray<std::tuple<uint32_t, uint32_t>> mEndMatchCommonSubstringLengths;
};
/**
* @brief Creator class which creates an exact match text directive.
*
*/
class ExactMatchTextDirectiveCreator : public TextDirectiveCreator {
private:
using TextDirectiveCreator::TextDirectiveCreator;
Result<bool, ErrorResult> CollectContextTerms() override;
void CollectContextTermWordBoundaryDistances() override;
Result<Ok, ErrorResult> FindAllMatchingCandidates() override;
void FindCommonSubstringLengths(
const nsTArray<RefPtr<AbstractRange>>& aMatchRanges);
Maybe<TextDirective> FindShortestCombination() const override;
nsTArray<std::tuple<uint32_t, uint32_t>> mCommonSubstringLengths;
};
} // namespace mozilla::dom
#endif
|