File: TextDirectiveFinder.cpp

package info (click to toggle)
firefox 147.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 4,683,324 kB
  • sloc: cpp: 7,607,156; javascript: 6,532,492; ansic: 3,775,158; python: 1,415,368; xml: 634,556; asm: 438,949; java: 186,241; sh: 62,751; makefile: 18,079; objc: 13,092; perl: 12,808; yacc: 4,583; cs: 3,846; pascal: 3,448; lex: 1,720; ruby: 1,003; php: 436; lisp: 258; awk: 247; sql: 66; sed: 54; csh: 10; exp: 6
file content (434 lines) | stat: -rw-r--r-- 20,221 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "TextDirectiveFinder.h"

#include "Document.h"
#include "TextDirectiveUtil.h"
#include "fragmentdirectives_ffi_generated.h"
#include "mozilla/CycleCollectedUniquePtr.h"
#include "mozilla/ToString.h"
#include "mozilla/glean/DomMetrics.h"
#include "nsFind.h"
#include "nsRange.h"

namespace mozilla::dom {

TextDirectiveFinder::TextDirectiveFinder(
    Document* aDocument, nsTArray<TextDirective>&& aTextDirectives)
    : mDocument(WrapNotNull(aDocument)),
      mUninvokedTextDirectives(std::move(aTextDirectives)) {}

TextDirectiveFinder::~TextDirectiveFinder() {
  if (mFoundDirectiveCount) {
    glean::dom_textfragment::find_directives.AccumulateRawDuration(
        mFindTextDirectivesDuration);

    TEXT_FRAGMENT_LOG("Found {} directives in {}ms", mFoundDirectiveCount,
                      mFindTextDirectivesDuration.ToMilliseconds());
  }
  if (HasUninvokedDirectives()) {
    mDocument->SetUseCounter(eUseCounter_custom_InvalidTextDirectives);
  }
}

void TextDirectiveFinder::Traverse(
    nsCycleCollectionTraversalCallback& aCallback) {
  CycleCollectionNoteChild(aCallback, mDocument.get().get(),
                           "TextDirectiveFinder::mDocument", aCallback.Flags());
}

bool TextDirectiveFinder::HasUninvokedDirectives() const {
  return !mUninvokedTextDirectives.IsEmpty();
}

nsTArray<RefPtr<nsRange>> TextDirectiveFinder::FindTextDirectivesInDocument() {
  if (mUninvokedTextDirectives.IsEmpty()) {
    return {};
  }

  const TimeStamp start = TimeStamp::Now();

  auto uri = TextDirectiveUtil::ShouldLog() && mDocument->GetDocumentURI()
                 ? mDocument->GetDocumentURI()->GetSpecOrDefault()
                 : nsCString();
  TEXT_FRAGMENT_LOG("Trying to find text directives in document '{}'.", uri);
  mDocument->FlushPendingNotifications(FlushType::Layout);
  // https://wicg.github.io/scroll-to-text-fragment/#invoke-text-directives
  // To invoke text directives, given as input a list of text directives text
  // directives and a Document document, run these steps:
  // 1. Let ranges be a list of ranges, initially empty.
  nsTArray<RefPtr<nsRange>> textDirectiveRanges(
      mUninvokedTextDirectives.Length());

  // Additionally (not mentioned in the spec), remove all text directives from
  // the input list to keep only the ones that are not found.
  // This code runs repeatedly during a page load, so it is possible that the
  // match for a text directive has not been parsed yet.
  nsTArray<TextDirective> uninvokedTextDirectives(
      mUninvokedTextDirectives.Length());

  // 2. For each text directive directive of text directives:
  for (TextDirective& textDirective : mUninvokedTextDirectives) {
    // 2.1 If the result of running find a range from a text directive given
    //     directive and document is non-null, then append it to ranges.
    if (RefPtr<nsRange> range = FindRangeForTextDirective(textDirective)) {
      textDirectiveRanges.AppendElement(range);
      TEXT_FRAGMENT_LOG("Found text directive '{}'",
                        ToString(textDirective).c_str());
      if (RefPtr startNode = range->GetStartContainer()) {
        startNode->QueueAncestorRevealingAlgorithm();
      }
    } else {
      uninvokedTextDirectives.AppendElement(std::move(textDirective));
    }
  }
  if (TextDirectiveUtil::ShouldLog()) {
    if (uninvokedTextDirectives.Length() == mUninvokedTextDirectives.Length()) {
      TEXT_FRAGMENT_LOG("Did not find any of the {} uninvoked text directives.",
                        mUninvokedTextDirectives.Length());
    } else {
      TEXT_FRAGMENT_LOG(
          "Found {} of {} text directives in the document.",
          mUninvokedTextDirectives.Length() - uninvokedTextDirectives.Length(),
          mUninvokedTextDirectives.Length());
    }
    if (uninvokedTextDirectives.IsEmpty()) {
      TEXT_FRAGMENT_LOG("No uninvoked text directives left.");
    } else {
      TEXT_FRAGMENT_LOG("There are {} uninvoked text directives left:",
                        uninvokedTextDirectives.Length());
      for (size_t index = 0; index < uninvokedTextDirectives.Length();
           ++index) {
        TEXT_FRAGMENT_LOG(" [{}]: {}", index,
                          ToString(uninvokedTextDirectives[index]).c_str());
      }
    }
  }
  mUninvokedTextDirectives = std::move(uninvokedTextDirectives);

  mFindTextDirectivesDuration += TimeStamp::Now() - start;
  mFoundDirectiveCount += static_cast<int64_t>(textDirectiveRanges.Length());

  // 3. Return ranges.
  return textDirectiveRanges;
}

RefPtr<nsRange> TextDirectiveFinder::FindRangeForTextDirective(
    const TextDirective& aTextDirective) {
  // This method follows this spec algorithm and applies some changes:
  // https://wicg.github.io/scroll-to-text-fragment/#find-a-range-from-a-text-directive
  TEXT_FRAGMENT_LOG("Find range for text directive '{}'.",
                    ToString(aTextDirective).c_str());
  // 1. Let searchRange be a range with start (document, 0) and end (document,
  // document’s length)
  ErrorResult rv;
  RefPtr<nsRange> searchRange =
      nsRange::Create(mDocument, 0, mDocument, mDocument->Length(), rv);
  if (rv.Failed()) {
    return nullptr;
  }

  nsContentUtils::NodeIndexCache nodeIndexCache;
  RefPtr<nsFind> finder = new nsFind();
  finder->SetNodeIndexCache(&nodeIndexCache);

  // 2. While searchRange is not collapsed:
  while (!searchRange->Collapsed()) {
    // 2.1. Let potentialMatch be null.
    RefPtr<nsRange> potentialMatch;
    // 2.2. If parsedValues’s prefix is not null:
    if (!aTextDirective.prefix.IsEmpty()) {
      // 2.2.1. Let prefixMatch be the the result of running the find a string
      // in range steps with query parsedValues’s prefix, searchRange
      // searchRange, wordStartBounded true and wordEndBounded false.
      RefPtr<nsRange> prefixMatch = TextDirectiveUtil::FindStringInRange(
          finder, searchRange->StartRef(), searchRange->EndRef(),
          aTextDirective.prefix, true, false);
      // 2.2.2. If prefixMatch is null, return null.
      if (!prefixMatch) {
        TEXT_FRAGMENT_LOG(
            "Did not find prefix '{}'. The text directive does not exist "
            "in the document.",
            NS_ConvertUTF16toUTF8(aTextDirective.prefix));
        return nullptr;
      }
      TEXT_FRAGMENT_LOG("Did find prefix '{}'.",
                        NS_ConvertUTF16toUTF8(aTextDirective.prefix));

      // 2.2.3. Set searchRange’s start to the first boundary point after
      // prefixMatch’s start
      MOZ_DIAGNOSTIC_ASSERT(prefixMatch->GetStartContainer()->IsText());
      const RangeBoundary boundaryPoint =
          TextDirectiveUtil::MoveToNextBoundaryPoint(prefixMatch->StartRef());
      if (!boundaryPoint.IsSetAndValid()) {
        return nullptr;
      }
      searchRange->SetStart(boundaryPoint.AsRaw(), rv);
      if (rv.Failed()) {
        return nullptr;
      }

      // 2.2.4. Let matchRange be a range whose start is prefixMatch’s end and
      // end is searchRange’s end.
      // Note:
      // The spec is very inefficient. The start text must _immediately_ follow
      // after the end of the prefix. Therefore, it would be a huge waste to
      // search until the end of the document. Since the following `start`
      // attribute can't go across a block boundary, it is sufficient to do a
      // search until the next block boundary.
      RefPtr<nsRange> matchRange = nsRange::Create(
          prefixMatch->GetEndContainer(), prefixMatch->EndOffset(),
          searchRange->GetEndContainer(), searchRange->EndOffset(), rv);
      if (rv.Failed()) {
        return nullptr;
      }
      // 2.2.5. Advance matchRange’s start to the next non-whitespace position.
      TextDirectiveUtil::AdvanceStartToNextNonWhitespacePosition(*matchRange);
      // 2.2.6. If matchRange is collapsed return null.
      // (This can happen if prefixMatch’s end or its subsequent non-whitespace
      // position is at the end of the document.)
      if (matchRange->Collapsed()) {
        return nullptr;
      }
      // 2.2.7. Assert: matchRange’s start node is a Text node.
      // (matchRange’s start now points to the next non-whitespace text data
      // following a matched prefix.)
      MOZ_ASSERT(matchRange->GetStartContainer()->IsText());
      // Set `matchRange`s end to the next block boundary.
      auto nextBlockBoundary =
          TextDirectiveUtil::FindNextBlockBoundary<TextScanDirection::Right>(
              matchRange->StartRef());

      matchRange->SetEnd(nextBlockBoundary.AsRaw(), IgnoreErrors());

      // 2.2.8. Let mustEndAtWordBoundary be true if parsedValues’s end is
      // non-null or parsedValues’s suffix is null, false otherwise.
      const bool mustEndAtWordBoundary =
          !aTextDirective.end.IsEmpty() || aTextDirective.suffix.IsEmpty();
      // 2.2.9. Set potentialMatch to the result of running the find a string in
      // range steps with query parsedValues’s start, searchRange matchRange,
      // wordStartBounded false, and wordEndBounded mustEndAtWordBoundary.
      potentialMatch = TextDirectiveUtil::FindStringInRange(
          finder, matchRange->StartRef(), matchRange->EndRef(),
          aTextDirective.start, false, mustEndAtWordBoundary);
      // 2.2.10. If potentialMatch is null, return null.
      // Note: Because the search range for start only goes to the next block
      // boundary, this statement is wrong. If potentialMatch is null, the loop
      // needs to be restarted.
      if (!potentialMatch) {
        TEXT_FRAGMENT_LOG(
            "Did not find start '{}' in the sub range of the end of `prefix` "
            "and the next block boundary. Restarting outer loop.",
            NS_ConvertUTF16toUTF8(aTextDirective.start));
        continue;
      }
      // 2.2.11. If potentialMatch’s start is not matchRange’s start, then
      // continue.
      // (In this case, we found a prefix but it was followed by something other
      // than a matching text so we’ll continue searching for the next instance
      // of prefix.)
      if (potentialMatch->StartRef() != matchRange->StartRef()) {
        TEXT_FRAGMENT_LOG(
            "The prefix is not directly followed by the start element. "
            "Restarting outer loop.");
        continue;
      }
      TEXT_FRAGMENT_LOG("Did find start '{}'.",
                        NS_ConvertUTF16toUTF8(aTextDirective.start));
    }
    // 2.3. Otherwise:
    else {
      // 2.3.1. Let mustEndAtWordBoundary be true if parsedValues’s end is
      // non-null or parsedValues’s suffix is null, false otherwise.
      const bool mustEndAtWordBoundary =
          !aTextDirective.end.IsEmpty() || aTextDirective.suffix.IsEmpty();
      // 2.3.2. Set potentialMatch to the result of running the find a string in
      // range steps with query parsedValues’s start, searchRange searchRange,
      // wordStartBounded true, and wordEndBounded mustEndAtWordBoundary.
      potentialMatch = TextDirectiveUtil::FindStringInRange(
          finder, searchRange->StartRef(), searchRange->EndRef(),
          aTextDirective.start, true, mustEndAtWordBoundary);
      // 2.3.3. If potentialMatch is null, return null.
      if (!potentialMatch) {
        TEXT_FRAGMENT_LOG(
            "Did not find start '{}'. The text directive does not exist "
            "in the document.",
            NS_ConvertUTF16toUTF8(aTextDirective.start));
        return nullptr;
      }
      if (potentialMatch && aTextDirective.end.IsEmpty() &&
          aTextDirective.suffix.IsEmpty()) {
        return potentialMatch;
      }
      // 2.3.4. Set searchRange’s start to the first boundary point after
      // potentialMatch’s start
      MOZ_DIAGNOSTIC_ASSERT(potentialMatch->GetStartContainer()->IsText());
      const RangeBoundary newRangeBoundary =
          TextDirectiveUtil::MoveToNextBoundaryPoint(
              potentialMatch->StartRef());

      if (!newRangeBoundary.IsSetAndValid()) {
        return nullptr;
      }
      searchRange->SetStart(newRangeBoundary.AsRaw(), rv);
      if (rv.Failed()) {
        return nullptr;
      }
    }
    // 2.4. Let rangeEndSearchRange be a range whose start is potentialMatch’s
    // end and whose end is searchRange’s end.
    RefPtr<nsRange> rangeEndSearchRange = nsRange::Create(
        potentialMatch->GetEndContainer(), potentialMatch->EndOffset(),
        searchRange->GetEndContainer(), searchRange->EndOffset(), rv);
    if (rv.Failed()) {
      return nullptr;
    }
    // 2.5. While rangeEndSearchRange is not collapsed:
    while (!rangeEndSearchRange->Collapsed()) {
      // 2.5.1. If parsedValues’s end item is non-null, then:
      if (!aTextDirective.end.IsEmpty()) {
        // 2.5.1.1. Let mustEndAtWordBoundary be true if parsedValues’s suffix
        // is null, false otherwise.
        const bool mustEndAtWordBoundary = aTextDirective.suffix.IsEmpty();
        // 2.5.1.2. Let endMatch be the result of running the find a string in
        // range steps with query parsedValues’s end, searchRange
        // rangeEndSearchRange, wordStartBounded true, and wordEndBounded
        // mustEndAtWordBoundary.
        RefPtr<nsRange> endMatch = TextDirectiveUtil::FindStringInRange(
            finder, rangeEndSearchRange->StartRef(),
            rangeEndSearchRange->EndRef(), aTextDirective.end, true,
            mustEndAtWordBoundary);
        // 2.5.1.3. If endMatch is null then return null.
        if (!endMatch) {
          TEXT_FRAGMENT_LOG(
              "Did not find end '{}'. The text directive does not exist "
              "in the document.",
              NS_ConvertUTF16toUTF8(aTextDirective.end));
          return nullptr;
        }
        // 2.5.1.4. Set potentialMatch’s end to endMatch’s end.
        potentialMatch->SetEnd(endMatch->GetEndContainer(),
                               endMatch->EndOffset());
      }
      // 2.5.2. Assert: potentialMatch is non-null, not collapsed and represents
      // a range exactly containing an instance of matching text.
      MOZ_ASSERT(potentialMatch && !potentialMatch->Collapsed());

      // 2.5.3. If parsedValues’s suffix is null, return potentialMatch.
      if (aTextDirective.suffix.IsEmpty()) {
        TEXT_FRAGMENT_LOG("Did find a match.");
        return potentialMatch;
      }
      // 2.5.4. Let suffixRange be a range with start equal to potentialMatch’s
      // end and end equal to searchRange’s end.
      // Note: Again, this is highly inefficient. It's perfectly fine to only
      // search up to the next block boundary.
      RefPtr<nsRange> suffixRange = nsRange::Create(
          potentialMatch->GetEndContainer(), potentialMatch->EndOffset(),
          searchRange->GetEndContainer(), searchRange->EndOffset(), rv);
      if (rv.Failed()) {
        return nullptr;
      }
      // 2.5.5. Advance suffixRange's start to the next non-whitespace position.
      TextDirectiveUtil::AdvanceStartToNextNonWhitespacePosition(*suffixRange);
      auto nextBlockBoundary =
          TextDirectiveUtil::FindNextBlockBoundary<TextScanDirection::Right>(
              suffixRange->StartRef());
      suffixRange->SetEnd(nextBlockBoundary.AsRaw(), IgnoreErrors());

      // 2.5.6. Let suffixMatch be result of running the find a string in range
      // steps with query parsedValue's suffix, searchRange suffixRange,
      // wordStartBounded false, and wordEndBounded true.
      RefPtr<nsRange> suffixMatch = TextDirectiveUtil::FindStringInRange(
          finder, suffixRange->StartRef(), suffixRange->EndRef(),
          aTextDirective.suffix, false, true);
      // 2.5.7. If suffixMatch is null, return null.
      // (If the suffix doesn't appear in the remaining text of the document,
      // there's no possible way to make a match.)
      // 2.5.8. If suffixMatch's start is suffixRange's start, return
      // potentialMatch.
      // 2.5.9. If parsedValue's end item is null then break;
      // (If this is an exact match and the suffix doesn’t match, start
      // searching for the next range start by breaking out of this loop without
      // rangeEndSearchRange being collapsed. If we’re looking for a range
      // match, we’ll continue iterating this inner loop since the range start
      // will already be correct.)
      // 2.5.10. Set rangeEndSearchRange's start to potentialMatch's end.
      // (Otherwise, it is possible that we found the correct range start, but
      // not the correct range end. Continue the inner loop to keep searching
      // for another matching instance of rangeEnd.)
      // Note: the steps above are not correct anymore because of restricting
      // the suffix find to a sub range.
      // Therefore, the code looks different, but _essentially_ does the same as
      // what's described in the spec steps.
      rangeEndSearchRange->SetStart(potentialMatch->GetEndContainer(),
                                    potentialMatch->EndOffset());
      if (!suffixMatch) {
        if (aTextDirective.end.IsEmpty()) {
          TEXT_FRAGMENT_LOG(
              "Did not find suffix in the sub range of the end of `start` and "
              "the next block boundary. Restarting outer loop.");
          break;
        }
        TEXT_FRAGMENT_LOG(
            "Did not find suffix in the sub range of the end of `end` and the "
            "next block boundary. Discarding this `end` candidate and "
            "continuing inner loop.");
        continue;
      }
      if (suffixMatch->GetStartContainer() ==
              suffixRange->GetStartContainer() &&
          suffixMatch->StartOffset() == suffixRange->StartOffset()) {
        TEXT_FRAGMENT_LOG("Did find a match.");
        return potentialMatch;
      }
      if (aTextDirective.end.IsEmpty()) {
        TEXT_FRAGMENT_LOG(
            "Did find suffix in the sub range of end of `start` to the end of "
            "the next block boundary, but not at the start. Restarting outer "
            "loop.");
        break;
      }
      TEXT_FRAGMENT_LOG(
          "Did find `suffix` in the sub range of end of `end` to the end of "
          "the current block, but not at the start. Restarting inner loop.");
    }
    // 2.6. If rangeEndSearchRange is collapsed then:
    if (rangeEndSearchRange->Collapsed()) {
      // 2.6.1. Assert parsedValue's end item is non-null.
      // (This can only happen for range matches due to the break for exact
      // matches in step 9 of the above loop. If we couldn’t find a valid
      // rangeEnd+suffix pair anywhere in the doc then there’s no possible way
      // to make a match.)
      // ----
      // XXX(:jjaschke): Not too sure about this. If a text directive is only
      // defined by a (prefix +) start element, and the start element happens to
      // be at the end of the document, `rangeEndSearchRange` could be
      // collapsed. Therefore, the loop in section 2.5 does not run. Also,
      // if there would be either an `end` and/or a `suffix`, this would assert
      // instead of returning `nullptr`, indicating that there's no match.
      // Instead, the following would make the algorithm more safe:
      // if there is no end or suffix, the potential match is actually a match,
      // so return it. Otherwise, the text directive can't be in the document,
      // therefore return nullptr.
      if (aTextDirective.end.IsEmpty() && aTextDirective.suffix.IsEmpty()) {
        TEXT_FRAGMENT_LOG(
            "rangeEndSearchRange was collapsed, no end or suffix "
            "present. Returning a match");
        return potentialMatch;
      }
      TEXT_FRAGMENT_LOG(
          "rangeEndSearchRange was collapsed, there is an end or "
          "suffix. There can't be a match.");
      return nullptr;
    }
  }
  // 3. Return null.
  TEXT_FRAGMENT_LOG("Did not find a match.");
  return nullptr;
}

}  // namespace mozilla::dom