File: pcre-move-to-pcre2.patch

package info (click to toggle)
watchman 4.9.0-9
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 9,992 kB
  • sloc: cpp: 27,459; python: 6,538; java: 3,404; php: 3,257; ansic: 2,803; javascript: 1,116; makefile: 671; ruby: 364; sh: 124; xml: 102; lisp: 4
file content (235 lines) | stat: -rw-r--r-- 7,490 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
From: Xavier Deguillard <xavierd@fb.com>
Date: Tue, 26 Jul 2022 13:51:58 -0700
Subject: pcre: move to pcre2

Summary:
The pcre API is no longer maintained. According to pcre.org:

. The older, but still widely deployed PCRE library, originally released in
. 1997, is at version 8.45. This version of PCRE is now at end of life, and is no
. longer being actively maintained. Version 8.45 is expected to be the final
. release of the older PCRE library, and new projects should use PCRE2 instead.

Let's thus migrate to pcre2.

Reviewed By: chadaustin

Differential Revision: D38134776

fbshipit-source-id: 0d2eaeb84013cd03a7426fffa23afe9f48d57cdf

Comment: ported to older watchman by Jeremy Bicha
---
 TARGETS           |  2 +-
 configure.ac      | 18 +++++++--------
 query/pcre.cpp    | 66 ++++++++++++++++++++++++++++++++-----------------------
 watchman_system.h |  5 +++--
 4 files changed, 52 insertions(+), 39 deletions(-)

diff --git a/TARGETS b/TARGETS
index 5b3ddb3..4ec3d25 100644
--- a/TARGETS
+++ b/TARGETS
@@ -191,7 +191,7 @@ cpp_library(
 cpp_library(
     name = "pcre",
     srcs = ["query/pcre.cpp"],
-    compiler_flags = ["-DHAVE_PCRE_H"] + compiler_flags,
+    compiler_flags = ["-DHAVE_PCRE2_H"] + compiler_flags,
     link_whole = True,
     deps = [":headers"],
     external_deps = ["pcre"],
diff --git a/configure.ac b/configure.ac
index 7e809a5..a57deb2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -152,8 +152,8 @@ AM_CONDITIONAL(HAVE_RUBY, [test "x$want_ruby" = "xyes"])
 
 AC_ARG_WITH(pcre, [
   --without-pcre       Don't enable pcre support.
-  --with-pcre=PATH     Enable pcre support.  PATH is location of pcre-config.
-                       Default is to enable and look for pcre-config in your
+  --with-pcre=PATH     Enable pcre support.  PATH is location of pcre2-config.
+                       Default is to enable and look for pcre2-config in your
                        $PATH
 ],[
   pcre_config="$withval"
@@ -163,10 +163,10 @@ AC_ARG_WITH(pcre, [
 
 if test "$pcre_config" != "no" ; then
   if test "$pcre_config" = "yes" -o "$pcre_config" = "check"; then
-    AC_PATH_PROG(PCRE_CONFIG_PATH, pcre-config, false)
+    AC_PATH_PROG(PCRE_CONFIG_PATH, pcre2-config, false)
     dnl If --with-pcre was specified but pcre-config not found, fail hard now.
     if test "$pcre_config" = "yes" -a "$PCRE_CONFIG_PATH" = "false"; then
-      AC_MSG_FAILURE([--with-pcre was given, but pcre-config not found in PATH])
+      AC_MSG_FAILURE([--with-pcre was given, but pcre2-config not found in PATH])
     fi
   else
     PCRE_CONFIG_PATH="$pcre_config"
@@ -181,18 +181,18 @@ if test "$pcre_config" != "no" ; then
   fi
 
   if test -n "$pcre_version"; then
-    PCRE_LIBS=`$PCRE_CONFIG_PATH --libs`
+    PCRE_LIBS=`$PCRE_CONFIG_PATH --libs8`
     LIBS="$LIBS $PCRE_LIBS"
     PCRE_CFLAGS=`$PCRE_CONFIG_PATH --cflags`
     CFLAGS="$CFLAGS $PCRE_CFLAGS"
     CPPFLAGS="$CPPFLAGS $PCRE_CFLAGS"
-    AC_CHECK_HEADERS(pcre.h, [], [
+    AC_CHECK_HEADERS(pcre2.h, [], [
       if test "$pcre_config" != "check"; then
          AC_MSG_FAILURE([--with-pcre was given, but pcre not found:
-  pcre-config --libs=$PCRE_LIBS
-  pcre-config --cflags=$PCRE_CFLAGS])
+  pcre2-config --libs8=$PCRE_LIBS
+  pcre2-config --cflags=$PCRE_CFLAGS])
       fi
-    ])
+    ], [#define PCRE2_CODE_UNIT_WIDTH 8])
   elif test "$pcre_config" != "check"; then
     AC_MSG_FAILURE([$PCRE_CONFIG_PATH failed to run, could not check for PCRE])
   fi
diff --git a/query/pcre.cpp b/query/pcre.cpp
index 60c201f..1329d3b 100644
--- a/query/pcre.cpp
+++ b/query/pcre.cpp
@@ -5,25 +5,25 @@
 
 #include "make_unique.h"
 
-#ifdef HAVE_PCRE_H
+#ifdef HAVE_PCRE2_H
 
 using watchman::CaseSensitivity;
 
 class PcreExpr : public QueryExpr {
-  pcre *re;
-  pcre_extra *extra;
+  pcre2_code *re;
+  pcre2_match_data* matchData;
   bool wholename;
 
  public:
-  explicit PcreExpr(pcre* re, pcre_extra* extra, bool wholename)
-      : re(re), extra(extra), wholename(wholename) {}
+  explicit PcreExpr(pcre2_code* re, pcre2_match_data* matchData, bool wholename)
+      : re(re), matchData(matchData), wholename(wholename) {}
 
   ~PcreExpr() override {
     if (re) {
-      pcre_free(re);
+      pcre2_code_free(re);
     }
-    if (extra) {
-      pcre_free(extra);
+    if (matchData) {
+      pcre2_match_data_free(matchData);
     }
   }
 
@@ -37,16 +37,17 @@ class PcreExpr : public QueryExpr {
       str = file->baseName();
     }
 
-    rc = pcre_exec(re, extra, str.data(), str.size(), 0, 0, nullptr, 0);
-
-    if (rc == PCRE_ERROR_NOMATCH) {
-      return false;
-    }
-    if (rc >= 0) {
-      return true;
-    }
-    // An error.  It's not actionable here
-    return false;
+    rc = pcre2_match(
+        re,
+        reinterpret_cast<const unsigned char*>(str.data()),
+        str.size(),
+        0,
+        0,
+        matchData,
+        nullptr);
+    // Errors are either PCRE2_ERROR_NOMATCH or non actionable. Thus only match
+    // when we get a positive return value.
+    return rc >= 0;
   }
 
   static std::unique_ptr<QueryExpr>
@@ -54,9 +55,7 @@ class PcreExpr : public QueryExpr {
     const char *ignore, *pattern, *scope = "basename";
     const char *which =
         caseSensitive == CaseSensitivity::CaseInSensitive ? "ipcre" : "pcre";
-    pcre* re;
-    const char* errptr = nullptr;
-    int erroff = 0;
+    size_t erroff = 0;
     int errcode = 0;
 
     if (json_unpack(term, "[s,s,s]", &ignore, &pattern, &scope) != 0 &&
@@ -70,29 +69,42 @@ class PcreExpr : public QueryExpr {
           "Invalid scope '", scope, "' for ", which, " expression"));
     }
 
-    re = pcre_compile2(
-        pattern,
-        caseSensitive == CaseSensitivity::CaseInSensitive ? PCRE_CASELESS : 0,
+    auto re = pcre2_compile(
+        reinterpret_cast<const unsigned char*>(pattern),
+        PCRE2_ZERO_TERMINATED,
+        caseSensitive == CaseSensitivity::CaseInSensitive ? PCRE2_CASELESS : 0,
         &errcode,
-        &errptr,
         &erroff,
         nullptr);
     if (!re) {
+      // From PCRE2 documentation:
+      // https://www.pcre.org/current/doc/html/pcre2api.html#SEC32: "None of the
+      // messages are very long; a buffer size of 120 code units is ample"
+      PCRE2_UCHAR buffer[120];
+      static_assert(
+          sizeof(char) == sizeof(PCRE2_UCHAR),
+          "Watchman uses the 8-bit PCRE2 library");
+      pcre2_get_error_message(errcode, buffer, 120);
       throw QueryParseError(watchman::to<std::string>(
           "invalid ",
           which,
           ": code ",
           errcode,
           " ",
-          errptr,
+          reinterpret_cast<const char*>(&buffer),
           " at offset ",
           erroff,
           " in ",
           pattern));
     }
 
+    auto matchData = pcre2_match_data_create_from_pattern(re, nullptr);
+    if (!matchData) {
+      throw std::bad_alloc();
+    }
+
     return watchman::make_unique<PcreExpr>(
-        re, pcre_study(re, 0, &errptr), !strcmp(scope, "wholename"));
+        re, matchData, !strcmp(scope, "wholename"));
   }
   static std::unique_ptr<QueryExpr> parsePcre(
       w_query* query,
diff --git a/watchman_system.h b/watchman_system.h
index 6235d01..baeb636 100644
--- a/watchman_system.h
+++ b/watchman_system.h
@@ -64,8 +64,9 @@
 #include <poll.h>
 #include <sys/wait.h>
 #endif
-#ifdef HAVE_PCRE_H
-# include <pcre.h>
+#ifdef HAVE_PCRE2_H
+#define PCRE2_CODE_UNIT_WIDTH 8
+# include <pcre2.h>
 #endif
 #ifdef HAVE_EXECINFO_H
 # include <execinfo.h>