File: utils.cc

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (75 lines) | stat: -rw-r--r-- 2,694 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
// Copyright 2020 The Chromium Authors
// Copyright 2014 Blake Embrey (hello@blakeembrey.com)
// Use of this source code is governed by an MIT-style license that can be
// found in the LICENSE file or at https://opensource.org/licenses/MIT.

#include "third_party/liburlpattern/utils.h"

#include <string_view>

namespace liburlpattern {

namespace {

constexpr std::string_view kRegexpSpecialCharacters(".+*?^${}()[]|/\\");
constexpr std::string_view kPatternSpecialCharacters("+*?:{}()\\");

void EscapeStringAndAppendInternal(std::string_view input,
                                   std::string& append_target,
                                   std::string_view special_chars) {
  for (auto& c : input) {
    if (special_chars.find(c) != std::string::npos)
      append_target += '\\';
    append_target += c;
  }
}

}  // namespace

size_t EscapedRegexpStringLength(std::string_view input) {
  size_t count = input.size();
  for (auto& c : input) {
    if (kRegexpSpecialCharacters.find(c) != std::string::npos)
      count += 1;
  }
  return count;
}

void EscapeRegexpStringAndAppend(std::string_view input,
                                 std::string& append_target) {
  return EscapeStringAndAppendInternal(input, append_target,
                                       kRegexpSpecialCharacters);
}

void EscapePatternStringAndAppend(std::string_view input,
                                  std::string& append_target) {
  return EscapeStringAndAppendInternal(input, append_target,
                                       kPatternSpecialCharacters);
}

std::string EscapeRegexpString(std::string_view input) {
  std::string result;
  result.reserve(EscapedRegexpStringLength(input));
  EscapeRegexpStringAndAppend(input, result);
  return result;
}

bool IsNameCodepoint(UChar32 c, bool first_codepoint) {
  // Require group names to follow the same character restrictions as
  // javascript identifiers.  This code originates from v8 at:
  //
  // https://source.chromium.org/chromium/chromium/src/+/master:v8/src/strings/char-predicates.cc;l=17-34;drc=be014256adea1552d4a044ef80616cdab6a7d549
  //
  // We deviate from js identifiers, however, in not support the backslash
  // character.  This is mainly used in js identifiers to allow escaped
  // unicode sequences to be written in ascii.  The js engine, however,
  // should take care of this long before we reach this level of code.  So
  // we don't need to handle it here.
  if (first_codepoint) {
    return u_hasBinaryProperty(c, UCHAR_ID_START) || c == '$' || c == '_';
  }
  return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE) || c == '$' || c == '_' ||
         c == 0x200c || c == 0x200d;
}

}  // namespace liburlpattern