| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 
 | commit cfcf1e6e9c4f8a2404810d8d1d90e6d1eaa0abdd
Author: Romain Francoise <romain@rfr.io>
Date:   Sat Nov 20 23:28:15 2021 +0100
    Port to PCRE2 API and enable JIT compilation
    
    The original PCRE API provided on most systems by libpcre3 is no longer
    maintained upstream and is superseded by the new PCRE2 API, which was
    first released in 2015. pcre3 will be removed from Debian in 2023, as
    noted in this bug report: https://bugs.debian.org/1000080
    
    This commit replaces the existing PCRE implementation with a new one
    using PCRE2, which is quite similar. One benefit is that PCRE2 provides
    a JIT compiler which can replace the interpretive regular expression
    evaluation code with native machine code on most modern platforms:
    https://pcre.org/current/doc/html/pcre2jit.html
    
    Depending on the length and complexity of the pattern used, enabling JIT
    compilation makes Ngrep 50x to 150x faster, testing in quiet mode on a
    multi-gigabyte PCAP file stored on tmpfs.
diff --git a/configure.in b/configure.in
index dbef39bcf5..0806a62066 100644
--- a/configure.in
+++ b/configure.in
@@ -141,16 +141,16 @@ dnl
 REGEX_DIR=''
 REGEX_OBJS=''
 
-AC_ARG_ENABLE(pcre,
-[  --enable-pcre           use PCRE instead of GNU regex (default GNU)],
-[ use_pcre="$enableval" ],
-[ use_pcre="no" ])
-
-if test $use_pcre = yes; then
-  USE_PCRE="1"
-  EXTRA_LIBS="$EXTRA_LIBS -lpcre"
+AC_ARG_ENABLE(pcre2,
+[  --enable-pcre2           use PCRE2 instead of GNU regex (default GNU)],
+[ use_pcre2="$enableval" ],
+[ use_pcre2="no" ])
+
+if test $use_pcre2 = yes; then
+  USE_PCRE2="1"
+  EXTRA_LIBS="$EXTRA_LIBS -lpcre2-8"
 else
-  USE_PCRE="0"
+  USE_PCRE2="0"
 
   AC_MSG_RESULT
   AC_MSG_RESULT(Configuring GNU Regular Expression library ...)
@@ -476,7 +476,7 @@ dnl
 AC_DEFINE_UNQUOTED(USE_PCAP_RESTART,          $USE_PCAP_RESTART,          [whether to call the BPF lexer restart function between multiple BPF filter compilation attempts (default no)])
 AC_DEFINE_UNQUOTED(PCAP_RESTART_FUNC,         $PCAP_RESTART_FUNC,         [routine used for restarting the BPF lexer])
 
-AC_DEFINE_UNQUOTED(USE_PCRE,                  $USE_PCRE,                  [whether to use PCRE (default GNU Regex)])
+AC_DEFINE_UNQUOTED(USE_PCRE2,                 $USE_PCRE2,                 [whether to use PCRE2 (default GNU Regex)])
 AC_DEFINE_UNQUOTED(USE_IPv6,                  $USE_IPv6,                  [whether to use IPv6 (default off)])
 AC_DEFINE_UNQUOTED(USE_TCPKILL,               $USE_TCPKILL,               [whether to enable tcpkill functionality (default off)])
 AC_DEFINE_UNQUOTED(USE_VLAN_HACK,             $USE_VLAN_HACK,             [whether to automatically include VLAN frames (default on)])
@@ -524,8 +524,8 @@ else
     AC_MSG_RESULT(CONFIG: privilege dropping DISABLED)
 fi
 
-if test "$USE_PCRE" = "1"; then
-    AC_MSG_RESULT(CONFIG: using PCRE regex library)
+if test "$USE_PCRE2" = "1"; then
+    AC_MSG_RESULT(CONFIG: using PCRE2 regex library)
 else
     AC_MSG_RESULT(CONFIG: using GNU regex library)
 fi
diff --git a/ngrep.c b/ngrep.c
index 3df9389c01..dcf05551ad 100644
--- a/ngrep.c
+++ b/ngrep.c
@@ -91,8 +91,9 @@
 #include <netinet/icmp6.h>
 #endif
 
-#if USE_PCRE
-#include <pcre.h>
+#if USE_PCRE2
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
 #else
 #include <regex.h>
 #endif
@@ -128,12 +129,14 @@ char nonprint_char = '.';
  * GNU Regex/PCRE
  */
 
-#if USE_PCRE
-int32_t err_offset;
-char *re_err = NULL;
+#if USE_PCRE2
+PCRE2_SIZE err_offset;
+int re_err;
 
-pcre *pattern = NULL;
-pcre_extra *pattern_extra = NULL;
+pcre2_code *re;
+pcre2_match_data *pcre2_md;
+PCRE2_SPTR pattern;
+uint32_t pcre2_jit_on = 0;
 #else
 const char *re_err = NULL;
 
@@ -189,6 +192,7 @@ uint32_t ws_row, ws_col = 80, ws_col_forced = 0;
 
 int main(int argc, char **argv) {
     int32_t c;
+    const char *extra = "";
 
     signal(SIGINT,   clean_exit);
     signal(SIGABRT,  clean_exit);
@@ -394,8 +398,12 @@ int main(int argc, char **argv) {
         if (setup_matcher())
             clean_exit(2);
 
+#if USE_PCRE2
+        if (pcre2_jit_on)
+            extra = " (JIT)";
+#endif
         if (quiet < 2 && strlen(match_data))
-            printf("%smatch: %s%s\n", invert_match?"don't ":"",
+            printf("%smatch%s: %s%s\n", invert_match?"don't ":"", extra,
                    (bin_data && !strchr(match_data, 'x'))?"0x":"", match_data);
 
         if (re_match_word) free(match_data);
@@ -631,14 +639,14 @@ int setup_matcher(void) {
 
     } else {
 
-#if USE_PCRE
-        uint32_t pcre_options = PCRE_UNGREEDY;
+#if USE_PCRE2
+        uint32_t pcre_options = PCRE2_UNGREEDY;
 
         if (re_ignore_case)
-            pcre_options |= PCRE_CASELESS;
+            pcre_options |= PCRE2_CASELESS;
 
         if (re_multiline_match)
-            pcre_options |= PCRE_DOTALL;
+            pcre_options |= PCRE2_DOTALL;
 #else
         re_syntax_options = RE_CHAR_CLASSES | RE_NO_BK_PARENS | RE_NO_BK_VBAR |
             RE_CONTEXT_INDEP_ANCHORS | RE_CONTEXT_INDEP_OPS;
@@ -673,15 +681,36 @@ int setup_matcher(void) {
             match_data = word_regex;
         }
 
-#if USE_PCRE
-        pattern = pcre_compile(match_data, pcre_options, (const char **)&re_err, &err_offset, 0);
+#if USE_PCRE2
+        re = pcre2_compile((PCRE2_SPTR8)match_data, PCRE2_ZERO_TERMINATED,
+            pcre_options, &re_err, &err_offset, NULL);
+        if (!re) {
+            PCRE2_UCHAR buffer[256];
+            pcre2_get_error_message(re_err, buffer, sizeof(buffer));
+            fprintf(stderr, "regex compile failed: %s (offset: %zd)\n", buffer,
+                err_offset);
+            return 1;
+        }
 
-        if (!pattern) {
-            fprintf(stderr, "compile failed: %s\n", re_err);
+        pcre2_md = pcre2_match_data_create_from_pattern(re, NULL);
+        if (!pcre2_md) {
+            fprintf(stderr, "unable to alloc pcre2 match data\n");
             return 1;
         }
 
-        pattern_extra = pcre_study(pattern, 0, (const char **)&re_err);
+        pcre2_config(PCRE2_CONFIG_JIT, &pcre2_jit_on);
+        if (pcre2_jit_on) {
+            int rc;
+            size_t jitsz;
+
+            if (pcre2_jit_compile(re, PCRE2_JIT_COMPLETE) != 0) {
+                fprintf(stderr, "unable to JIT-compile pcre2 regular expression\n");
+                return 1;
+            }
+            rc = pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jitsz);
+            if (rc || jitsz == 0)
+                pcre2_jit_on = 0;
+        }
 #else
         re_err = re_compile_pattern(match_data, strlen(match_data), &pattern);
         if (re_err) {
@@ -990,24 +1019,29 @@ void dump_packet(struct pcap_pkthdr *h, u_char *p, uint8_t proto, unsigned char
 }
 
 int8_t re_match_func(unsigned char *data, uint32_t len, uint16_t *mindex, uint16_t *msize) {
-#if USE_PCRE
-
-    static int sub[2];
-    switch(pcre_exec(pattern, 0, (char const *)data, (int32_t)len, 0, 0, 0, 0)) {
-        case PCRE_ERROR_NULL:
-        case PCRE_ERROR_BADOPTION:
-        case PCRE_ERROR_BADMAGIC:
-        case PCRE_ERROR_UNKNOWN_NODE:
-        case PCRE_ERROR_NOMEMORY:
-            perror("she's dead, jim\n");
-            clean_exit(2);
+#if USE_PCRE2
+    int rc;
+    PCRE2_SIZE *ovector;
+    PCRE2_UCHAR errbuf[256];
 
-        case PCRE_ERROR_NOMATCH:
-            return 0;
+    if (pcre2_jit_on)
+        rc = pcre2_jit_match(re, data, len, 0, 0, pcre2_md, NULL);
+    else
+        rc = pcre2_match(re, data, len, 0, 0, pcre2_md, NULL);
 
-        default:
-            *mindex = sub[0];
-            *msize  = sub[1] - sub[0];
+    if (rc < 0) {
+        switch (rc) {
+            case PCRE2_ERROR_NOMATCH:
+                return 0;
+            default:
+                pcre2_get_error_message(rc, errbuf, sizeof(errbuf));
+                fprintf(stderr, "she's dead, jim: %s (error %d)\n", errbuf, rc);
+                clean_exit(2);
+        }
+    } else {
+        ovector = pcre2_get_ovector_pointer(pcre2_md);
+        *mindex = ovector[0];
+        *msize = ovector[1] - ovector[0];
     }
 #else
 
@@ -1479,9 +1513,9 @@ void clean_exit(int32_t sig) {
     if (quiet < 1 && sig >= 0)
         printf("exit\n");
 
-#if USE_PCRE
-    if (pattern)       pcre_free(pattern);
-    if (pattern_extra) pcre_free(pattern_extra);
+#if USE_PCRE2
+    if (re)       pcre2_code_free(re);
+    if (pcre2_md) pcre2_match_data_free(pcre2_md);
 #else
     if (pattern.translate) free(pattern.translate);
     if (pattern.fastmap)   free(pattern.fastmap);
 |