File: pcre.cpp

package info (click to toggle)
watchman 4.9.0-9
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 9,992 kB
  • sloc: cpp: 27,459; python: 6,538; java: 3,404; php: 3,257; ansic: 2,803; javascript: 1,116; makefile: 671; ruby: 364; sh: 124; xml: 102; lisp: 4
file content (126 lines) | stat: -rw-r--r-- 3,554 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/* Copyright 2013-present Facebook, Inc.
 * Licensed under the Apache License, Version 2.0 */

#include "watchman.h"

#include "make_unique.h"

#ifdef HAVE_PCRE2_H

using watchman::CaseSensitivity;

class PcreExpr : public QueryExpr {
  pcre2_code *re;
  pcre2_match_data* matchData;
  bool wholename;

 public:
  explicit PcreExpr(pcre2_code* re, pcre2_match_data* matchData, bool wholename)
      : re(re), matchData(matchData), wholename(wholename) {}

  ~PcreExpr() override {
    if (re) {
      pcre2_code_free(re);
    }
    if (matchData) {
      pcre2_match_data_free(matchData);
    }
  }

  bool evaluate(struct w_query_ctx* ctx, const FileResult* file) override {
    w_string_piece str;
    int rc;

    if (wholename) {
      str = w_query_ctx_get_wholename(ctx);
    } else {
      str = file->baseName();
    }

    rc = pcre2_match(
        re,
        reinterpret_cast<const unsigned char*>(str.data()),
        str.size(),
        0,
        0,
        matchData,
        nullptr);
    // Errors are either PCRE2_ERROR_NOMATCH or non actionable. Thus only match
    // when we get a positive return value.
    return rc >= 0;
  }

  static std::unique_ptr<QueryExpr>
  parse(w_query*, const json_ref& term, CaseSensitivity caseSensitive) {
    const char *ignore, *pattern, *scope = "basename";
    const char *which =
        caseSensitive == CaseSensitivity::CaseInSensitive ? "ipcre" : "pcre";
    size_t erroff = 0;
    int errcode = 0;

    if (json_unpack(term, "[s,s,s]", &ignore, &pattern, &scope) != 0 &&
        json_unpack(term, "[s,s]", &ignore, &pattern) != 0) {
      throw QueryParseError(watchman::to<std::string>(
          "Expected [\"", which, "\", \"pattern\", \"scope\"?]"));
    }

    if (strcmp(scope, "basename") && strcmp(scope, "wholename")) {
      throw QueryParseError(watchman::to<std::string>(
          "Invalid scope '", scope, "' for ", which, " expression"));
    }

    auto re = pcre2_compile(
        reinterpret_cast<const unsigned char*>(pattern),
        PCRE2_ZERO_TERMINATED,
        caseSensitive == CaseSensitivity::CaseInSensitive ? PCRE2_CASELESS : 0,
        &errcode,
        &erroff,
        nullptr);
    if (!re) {
      // From PCRE2 documentation:
      // https://www.pcre.org/current/doc/html/pcre2api.html#SEC32: "None of the
      // messages are very long; a buffer size of 120 code units is ample"
      PCRE2_UCHAR buffer[120];
      static_assert(
          sizeof(char) == sizeof(PCRE2_UCHAR),
          "Watchman uses the 8-bit PCRE2 library");
      pcre2_get_error_message(errcode, buffer, 120);
      throw QueryParseError(watchman::to<std::string>(
          "invalid ",
          which,
          ": code ",
          errcode,
          " ",
          reinterpret_cast<const char*>(&buffer),
          " at offset ",
          erroff,
          " in ",
          pattern));
    }

    auto matchData = pcre2_match_data_create_from_pattern(re, nullptr);
    if (!matchData) {
      throw std::bad_alloc();
    }

    return watchman::make_unique<PcreExpr>(
        re, matchData, !strcmp(scope, "wholename"));
  }
  static std::unique_ptr<QueryExpr> parsePcre(
      w_query* query,
      const json_ref& term) {
    return parse(query, term, query->case_sensitive);
  }
  static std::unique_ptr<QueryExpr> parseIPcre(
      w_query* query,
      const json_ref& term) {
    return parse(query, term, CaseSensitivity::CaseInSensitive);
  }
};
W_TERM_PARSER("pcre", PcreExpr::parsePcre)
W_TERM_PARSER("ipcre", PcreExpr::parseIPcre)

#endif

/* vim:ts=2:sw=2:et:
 */