Description: Port to PCRE2.
Bug-Debian: https://bugs.debian.org/999931
Author: Yavor Doganov <yavor@gnu.org>
Forwarded: no
Last-Update: 2024-03-04
---

--- a/libsrc/Wi/Makefile.am
+++ b/libsrc/Wi/Makefile.am
@@ -563,7 +563,7 @@ libwi_la_SOURCES += \
 	$(libwi_base_la_sources)
 
 libwi_la_CFLAGS  = $(libwi_base_la_cflags)
-libwi_la_LDLAGS  = -static -lminizip -lpcre
+libwi_la_LDLAGS  = -static -lminizip -lpcre2-8
 
 #KUBL_UNIV_FILES_ODBC
 libwi_odbc_la_SOURCES += \
--- a/libsrc/Wi/bif_regexp.c
+++ b/libsrc/Wi/bif_regexp.c
@@ -31,7 +31,8 @@
 
 // Debian maintainer: replaced by external PCRE
 // #include "util/pcrelib/pcre.h"
-#include "pcre.h"
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
 
 /*
    typedef struct rx_query_s {
@@ -65,16 +66,17 @@ regexp_key_t;
 typedef struct compiled_regexp_s
 {
   int refctr;
-  pcre *code;
-  pcre_extra *code_x;
+  pcre2_code *code;
 }
 compiled_regexp_t;
 
 id_hash_t *compiled_regexps;
 
-int32 c_pcre_match_limit_recursion = 500;
-int32 c_pcre_match_limit = 100000;
-int32 pcre_max_cache_sz = 20000;
+static pcre2_match_context *match_ctxt = NULL;
+
+uint32 c_pcre_match_limit_recursion = 500;
+uint32 c_pcre_match_limit = 100000;
+uint32 pcre_max_cache_sz = 20000;
 int32 pcre_rnd_seed;
 
 id_hashed_key_t
@@ -96,6 +98,23 @@ regexp_key_hashcmp (char *x, char *y)
 }
 
 void
+create_match_context (void)
+{
+  if (NULL != match_ctxt)
+    return;
+
+  match_ctxt = pcre2_match_context_create (NULL);
+  if (c_pcre_match_limit > 0)
+    {
+      pcre2_set_match_limit (match_ctxt, c_pcre_match_limit);
+    }
+  if (c_pcre_match_limit_recursion > 0)
+    {
+      pcre2_set_depth_limit (match_ctxt, c_pcre_match_limit_recursion);
+    }
+}
+
+void
 release_compiled_regexp (id_hash_t *c_r, compiled_regexp_t *data)
 {
   int delete_data;
@@ -111,9 +130,7 @@ release_compiled_regexp (id_hash_t *c_r,
   if (!delete_data)
     return;
   if (NULL != data->code)
-    pcre_free (data->code);
-  if (NULL != data->code_x)
-    pcre_free (data->code_x);
+    pcre2_code_free (data->code);
   dk_free (data, sizeof (compiled_regexp_t));
 }
 
@@ -136,10 +153,11 @@ pcre_cache_check (id_hash_t * ht)
 }
 
 static compiled_regexp_t *
-get_compiled_regexp (id_hash_t *c_r, const char *pattern, int options, caddr_t *err_ret)
+get_compiled_regexp (id_hash_t *c_r, const char *pattern, uint32_t options, caddr_t *err_ret)
 {
-  const char *error = NULL;
-  int erroff;
+  int err;
+  PCRE2_UCHAR error[120];
+  PCRE2_SIZE erroff;
   regexp_key_t key;
   compiled_regexp_t **val = NULL;
   compiled_regexp_t tmp, *new_val;
@@ -155,46 +173,18 @@ get_compiled_regexp (id_hash_t *c_r, con
     }
   HT_UNLOCK (c_r);
   dbg_printf (("regex compiling (%s) with options %x ...\n", pattern, options));
-  tmp.code = pcre_compile (pattern, options, &error, &erroff, 0);
+  tmp.code = pcre2_compile ((PCRE2_SPTR) pattern, strlen (pattern),
+                            options, &err, &erroff, NULL);
   if (NULL == tmp.code)
     {
-      if (error)
-        err_ret[0] = srv_make_new_error ("2201B",
-            "SR098", "regexp error at \'%s\' column %d (%s)", pattern, erroff, error);
-      else
-        err_ret[0] = srv_make_new_error ("2201B",
-            "SR098", "regexp error at \'%s\' column %d", pattern, erroff);
+      pcre2_get_error_message(err, error, sizeof (error));
+      err_ret[0] = srv_make_new_error ("2201B",
+          "SR098", "regexp error at \'%s\' column %zu (%s)", pattern, erroff, error);
       return NULL;
     }
-  tmp.code_x = pcre_study (tmp.code, options, &error);
-#ifdef DEBUG
-  if (!tmp.code_x)
-    dbg_printf (("***warning RX100: regexp warning: extra regular expression compiling failed\n"));
-#endif
-  if (!tmp.code_x)
-     {
-       tmp.code_x = pcre_malloc (sizeof (pcre_extra));
-       if (tmp.code_x)
-         memset (tmp.code_x, 0, sizeof (pcre_extra));
-     }
-#ifdef PCRE_EXTRA_MATCH_LIMIT
-  if (c_pcre_match_limit > 0)
-    {
-      tmp.code_x->flags |= PCRE_EXTRA_MATCH_LIMIT;
-      tmp.code_x->match_limit = c_pcre_match_limit;
-    }
-#endif
-#ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
-  if (c_pcre_match_limit_recursion > 0)
-    {
-      tmp.code_x->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
-      tmp.code_x->match_limit_recursion = c_pcre_match_limit_recursion;
-    }
-#endif
   key.orig_strg = box_dv_short_string (pattern);
   new_val = (compiled_regexp_t *)dk_alloc (sizeof (compiled_regexp_t));
   new_val->code = tmp.code;
-  new_val->code_x = tmp.code_x;
   new_val->refctr = 1;
   HT_WRLOCK (c_r);
   pcre_cache_check (c_r);
@@ -301,18 +291,18 @@ bif_regexp_str_arg (caddr_t * qst, state
 }
 
 
-static int
+static uint32_t
 regexp_optchars_to_bits (const char *strg)
 {
-  int res = 0;
+  uint32_t res = 0;
   const char *tail;
   for (tail = strg; '\0' != tail[0]; tail++)
     {
       switch (tail[0])
         {
-        case 'i': case 'I': res |= PCRE_CASELESS; break;
-        case 'm': case 'M': res |= PCRE_MULTILINE; break;
-        case 's': case 'S': res |= PCRE_DOTALL; break;
+        case 'i': case 'I': res |= PCRE2_CASELESS; break;
+        case 'm': case 'M': res |= PCRE2_MULTILINE; break;
+        case 's': case 'S': res |= PCRE2_DOTALL; break;
 /*
 #define PCRE_EXTENDED           0x0008
 #define PCRE_ANCHORED           0x0010
@@ -323,7 +313,7 @@ regexp_optchars_to_bits (const char *str
 #define PCRE_UNGREEDY           0x0200
 #define PCRE_NOTEMPTY           0x0400
         */
-        case 'u': case 'U': res |= PCRE_UTF8; break;
+        case 'u': case 'U': res |= PCRE2_UTF; break;
         /*
 #define PCRE_NO_AUTO_CAPTURE    0x1000
         */
@@ -337,15 +327,16 @@ static caddr_t
 bif_regexp_match (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)
 {
   int utf8_mode;
-  int str_len;
+  PCRE2_SIZE str_len;
   compiled_regexp_t *cd_info = NULL;
   caddr_t p_to_free = NULL, str_to_free = NULL;
   char *pattern;
   char *str;
-  int c_opts = 0, r_opts = 0;
+  uint32_t c_opts = 0, r_opts = 0;
   caddr_t ret_str = NULL;
   long replace_the_instr = 0;
-  int offvect[NOFFSETS];
+  PCRE2_SIZE *offvect;
+  pcre2_match_data *md = NULL;
   int result;
 
   utf8_mode = 0;
@@ -363,7 +354,7 @@ bif_regexp_match (caddr_t * qst, caddr_t
   if (*err_ret) goto done;
 
   if (utf8_mode)
-    c_opts |= PCRE_UTF8;
+    c_opts |= PCRE2_UTF;
 
   if (!pattern || !str)
     goto done;
@@ -372,10 +363,14 @@ bif_regexp_match (caddr_t * qst, caddr_t
   if (err_ret[0])
     goto done;
 
-  str_len = (int) strlen (str);
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, NOFFSETS);
+  str_len = strlen (str);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
+                        md, match_ctxt);
   if (result >= 0)
     {
+      offvect = pcre2_get_ovector_pointer (md);
       ret_str = dk_alloc_box (offvect[1] - offvect[0] + 1, DV_SHORT_STRING);
       strncpy (ret_str, str + offvect[0], offvect[1] - offvect[0]);
       ret_str[offvect[1] - offvect[0]] = 0;
@@ -416,6 +411,7 @@ bif_regexp_match (caddr_t * qst, caddr_t
 
 done:
   release_compiled_regexp (compiled_regexps, cd_info);
+  pcre2_match_data_free (md);
   if (*err_ret)
     dk_free_box (ret_str);
   dk_free_tree (p_to_free);
@@ -427,15 +423,15 @@ static caddr_t
 bif_rdf_regex_impl (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)
 {
   int utf8_mode = 1;
-  int str_len;
+  PCRE2_SIZE str_len;
   compiled_regexp_t *cd_info = NULL;
   caddr_t p_to_free = NULL, str_to_free = NULL;
   char *pattern;
   char *str;
-  int c_opts = 0, r_opts = 0;
+  uint32_t c_opts = 0, r_opts = 0;
   int result = -1;
   caddr_t err = NULL;
-  int offvect[NOFFSETS];
+  pcre2_match_data *md = NULL;
   switch ((BOX_ELEMENTS (args)))
     {
     default:
@@ -448,7 +444,7 @@ bif_rdf_regex_impl (caddr_t * qst, caddr
   if (err) goto done;
 
   if (utf8_mode)
-    c_opts |= PCRE_UTF8;
+    c_opts |= PCRE2_UTF;
 
   if (!pattern || !str)
     goto done;
@@ -456,11 +452,15 @@ bif_rdf_regex_impl (caddr_t * qst, caddr
   cd_info = get_compiled_regexp (compiled_regexps, pattern, c_opts, err_ret);
   if (err_ret[0])
     goto done;
-  str_len = (int) strlen (str);
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, NOFFSETS);
+  str_len = strlen (str);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
+                        md, match_ctxt);
 
 done:
   release_compiled_regexp (compiled_regexps, cd_info);
+  pcre2_match_data_free (md);
   if (err)
     dk_free_tree (err);
   dk_free_tree (p_to_free);
@@ -476,13 +476,14 @@ bif_regexp_substr (caddr_t * qst, caddr_
   char *pattern;
   char *str;
   int offset;
-  int res_len;
+  PCRE2_SIZE res_len;
   compiled_regexp_t *cd_info = NULL;
-  int c_opts = 0, r_opts = 0;
+  uint32_t c_opts = 0, r_opts = 0;
   caddr_t p_to_free = NULL, str_to_free = NULL;
   caddr_t ret_str = NULL;
   int result;
-  int offvect[NOFFSETS];
+  PCRE2_SIZE *offvect;
+  pcre2_match_data *md = NULL;
 
   utf8_mode = 0;
   pattern = bif_regexp_str_arg (qst, args, 0, "regexp_substr", REGEXP_BF, &utf8_mode, &p_to_free, err_ret);
@@ -503,14 +504,20 @@ bif_regexp_substr (caddr_t * qst, caddr_
   if (NULL != err_ret[0])
     goto done;
 
-  res_len = (int) strlen (str);
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, res_len, 0, r_opts, offvect, NOFFSETS);
+  res_len = strlen (str);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, res_len, 0, r_opts,
+                        md, match_ctxt);
   if (result > 0)
     {
+      PCRE2_UCHAR *substr;
+
+      offvect = pcre2_get_ovector_pointer (md);
       int offs = offset*2, rc;
       int ret_strlen = (offset < result && offset >= 0 ? (offvect[offs+1] - offvect[offs]) : res_len);
       ret_str = dk_alloc_box (ret_strlen + 1, DV_SHORT_STRING);
-      rc = pcre_copy_substring (str, offvect, result, offset, ret_str, res_len + 1);
+      rc = pcre2_substring_get_bynumber (md, offset, &substr, &res_len);
       if (rc < 0)
         {
           *err_ret = srv_make_new_error ("2201B", "SR097",
@@ -521,15 +528,21 @@ bif_regexp_substr (caddr_t * qst, caddr_
         {
           if (utf8_mode)
             {
-              caddr_t wide_ret = box_utf8_as_wide_char (ret_str, NULL, ret_strlen, 0);
+              caddr_t wide_ret = box_utf8_as_wide_char ((ccaddr_t) substr, NULL, ret_strlen, 0);
               dk_free_box (ret_str);
               ret_str = wide_ret;
+              pcre2_substring_free (substr);
+            }
+          else
+            {
+              ret_str = (caddr_t) substr;
             }
         }
     }
 
 done:
   release_compiled_regexp (compiled_regexps, cd_info);
+  pcre2_match_data_free (md);
   if (*err_ret)
     dk_free_box (ret_str);
   dk_free_tree (p_to_free);
@@ -538,10 +551,10 @@ done:
 }
 
 ptrlong *
-regexp_offvect_to_array_of_long (utf8char *str, int *offvect, int result, int utf8_mode)
+regexp_offvect_to_array_of_long (utf8char *str, PCRE2_SIZE *offvect, int result, int utf8_mode)
 {
   int i, idx_to_fill;
-  int prev_ofs, ofs, prev_wide_len;
+  PCRE2_SIZE prev_ofs, ofs, prev_wide_len;
   dk_set_t skipped_i = NULL;
   ptrlong *ret_vec;
   virt_mbstate_t mb;
@@ -576,7 +589,7 @@ However all out-of-order E-s form a prop
       int next_nonnegative_ofs_i = i + 1;
       while (next_nonnegative_ofs_i < result)
         {
-          if (0 > offvect[next_nonnegative_ofs_i])
+          if (PCRE2_UNSET == offvect[next_nonnegative_ofs_i])
             {
               next_nonnegative_ofs_i++;
               continue;
@@ -647,11 +660,13 @@ bif_regexp_parse_impl (caddr_t * qst, ca
   int utf8_mode, utf8_mode2;
   char *pattern = NULL;
   char *str = NULL;
-  int offset, str_len;
+  int offset;
+  PCRE2_SIZE str_len;
   compiled_regexp_t *cd_info = NULL;
-  int c_opts = 0, r_opts = 0, max_n_hits = 0x1000000 / sizeof (ptrlong);
+  uint32_t c_opts = 0, r_opts = 0, max_n_hits = 0x1000000 / sizeof (ptrlong);
   caddr_t p_to_free = NULL, str_to_free = NULL;
-  int offvect[NOFFSETS];
+  PCRE2_SIZE *offvect;
+  pcre2_match_data *md = NULL;
   ptrlong *ret_vec = NULL;
   dk_set_t ret_revlist = NULL;
 
@@ -679,13 +694,16 @@ bif_regexp_parse_impl (caddr_t * qst, ca
   if (NULL != err_ret[0])
     goto done;
 
-  str_len = (int) strlen (str);
+  str_len = strlen (str);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
   if (parse_list)
     {
       while (0 < max_n_hits--)
         {
-          int result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, offset, r_opts,
-            offvect, NOFFSETS);
+          int result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len,
+                                    offset, r_opts, md, match_ctxt);
+          offvect = pcre2_get_ovector_pointer (md);
           if (0 >= result)
             break;
           ret_vec = regexp_offvect_to_array_of_long ((utf8char *)str, offvect, result, utf8_mode);
@@ -698,13 +716,15 @@ bif_regexp_parse_impl (caddr_t * qst, ca
     }
   else
     {
-      int result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, offset, r_opts,
-        offvect, NOFFSETS);
+      int result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len,
+                                offset, r_opts, md, match_ctxt);
+      offvect = pcre2_get_ovector_pointer (md);
       ret_vec = regexp_offvect_to_array_of_long ((utf8char *)str, offvect, result, utf8_mode);
     }
 
 done:
   release_compiled_regexp (compiled_regexps, cd_info);
+  pcre2_match_data_free (md);
   dk_free_tree (p_to_free);
   dk_free_tree (str_to_free);
   if (*err_ret)
@@ -989,7 +1009,10 @@ err_at_replace:
 static caddr_t
 bif_regexp_version (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)
 {
-  return box_dv_short_string(pcre_version());
+  char ver[24];
+
+  pcre2_config (PCRE2_CONFIG_VERSION, ver);
+  return box_dv_short_string(ver);
 }
 
 /*
@@ -1199,10 +1222,10 @@ regexp_match_iri_const (int what, const
 {
   static compiled_regexp_t *cd_iri, *cd_abs, *cd_ref;
   compiled_regexp_t *cd_info = NULL;
-  int r_opts = 0, c_opts = PCRE_UTF8 | PCRE_CASELESS;
-  int offvect[NOFFSETS];
+  uint32_t r_opts = 0, c_opts = PCRE2_UTF | PCRE2_CASELESS;
+  pcre2_match_data *md;
   int result;
-  int str_len = (int) strlen (str);
+  PCRE2_SIZE str_len = strlen (str);
   caddr_t pattern;
 
   cd_info = (RX_IRI_REF == what ? cd_ref : (RX_IRI == what ? cd_iri : cd_abs));
@@ -1226,8 +1249,11 @@ regexp_match_iri_const (int what, const
               break;
         }
     }
-  memset (offvect, -1, NOFFSETS * sizeof (int));
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, NOFFSETS);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
+                        md, match_ctxt);
+  pcre2_match_data_free (md);
   return (result != -1 ? 1 : 0);
 }
 
@@ -1268,21 +1294,26 @@ bif_regexp_init ()
 
 /* internal functions for internal usage in Virtuoso */
 caddr_t
-regexp_match_01 (const char* pattern, const char* str, int c_opts)
+regexp_match_01 (const char* pattern, const char* str, uint32_t c_opts)
 {
   compiled_regexp_t *cd_info = NULL;
   int r_opts = 0;
   caddr_t err = NULL;
-  int offvect[NOFFSETS];
+  PCRE2_SIZE *offvect;
+  pcre2_match_data *md;
   int result;
-  int str_len = (int) strlen (str);
+  size_t str_len = strlen (str);
 
   cd_info = get_compiled_regexp (compiled_regexps, pattern, c_opts, &err);
   if (err)
     sqlr_resignal (err);
 
-  memset (offvect, -1, NOFFSETS * sizeof (int));
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, NOFFSETS);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
+                        md, match_ctxt);
+  offvect = pcre2_get_ovector_pointer (md);
+  pcre2_match_data_free (md);
   release_compiled_regexp (compiled_regexps, cd_info);
   if (result != -1)
     {
@@ -1296,14 +1327,15 @@ regexp_match_01 (const char* pattern, co
 
 
 caddr_t
-regexp_match_01_const (const char* pattern, const char* str, int c_opts, void** ret)
+regexp_match_01_const (const char* pattern, const char* str, uint32_t c_opts, void** ret)
 {
   compiled_regexp_t *cd_info = ((compiled_regexp_t **)ret)[0];
-  int r_opts = 0;
+  uint32_t r_opts = 0;
   caddr_t err = NULL;
-  int offvect[NOFFSETS];
+  PCRE2_SIZE *offvect;
+  pcre2_match_data *md;
   int result;
-  int str_len = (int) strlen (str);
+  PCRE2_SIZE str_len = strlen (str);
   if (NULL == cd_info)
     {
       cd_info = get_compiled_regexp (compiled_regexps, pattern, c_opts, &err);
@@ -1311,8 +1343,12 @@ regexp_match_01_const (const char* patte
         sqlr_resignal (err);
       ret[0] = cd_info;
     }
-  memset (offvect, -1, NOFFSETS * sizeof (int));
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, NOFFSETS);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
+                        md, match_ctxt);
+  offvect = pcre2_get_ovector_pointer (md);
+  pcre2_match_data_free (md);
   if (result != -1)
     return box_dv_short_nchars (str + offvect[0], offvect[1] - offvect[0]);
   return NULL;
@@ -1323,26 +1359,26 @@ struct regexp_opts_s {
   char	mc;
   int	opt;
 } regexp_mode_table[] = {
-  { 'i',	PCRE_CASELESS },
-  { 'm',	PCRE_MULTILINE },
-  { 's',	PCRE_DOTALL },
-  { 'x',	PCRE_EXTENDED }
+  { 'i',	PCRE2_CASELESS },
+  { 'm',	PCRE2_MULTILINE },
+  { 's',	PCRE2_DOTALL },
+  { 'x',	PCRE2_EXTENDED }
 };
 
 #define regexp_mode_table_l (sizeof(regexp_mode_table)/sizeof(struct regexp_opts_s))
 
 
 
-int
+uint32_t
 regexp_make_opts (const char* mode)
 {
   const char* mode_char = mode;
-  int c_opts = 0;
+  uint32_t c_opts = 0;
   if (!mode)
     return 0;
   while (mode_char[0])
     {
-      int i;
+      size_t i;
       for (i=0;i<regexp_mode_table_l;i++)
 	{
 	  if (regexp_mode_table[i].mc == mode_char[0])
@@ -1372,18 +1408,24 @@ regexp_make_opts (const char* mode)
 */
 
 int
-regexp_split_parse (const char* pattern, const char* str, int* offvect, int offvect_sz, int c_opts)
+regexp_split_parse (const char* pattern, const char* str, PCRE2_SIZE* offvect, uint32_t offvect_sz, uint32_t c_opts)
 {
-  int str_len;
-  int r_opts = 0;
+  PCRE2_SIZE str_len;
+  uint32_t r_opts = 0;
   int result;
   caddr_t err = NULL;
+  pcre2_match_data *md;
   compiled_regexp_t *cd_info = get_compiled_regexp (compiled_regexps, pattern, c_opts, &err);
   if (err)
     sqlr_resignal (err);
 
-  str_len = (int) strlen (str);
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, offvect_sz);
+  str_len = strlen (str);
+  create_match_context ();
+  md = pcre2_match_data_create (offvect_sz, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
+                        md, match_ctxt);
+  offvect = pcre2_get_ovector_pointer (md);
+  pcre2_match_data_free (md);
   release_compiled_regexp (compiled_regexps, cd_info);
   return result;
 }
@@ -1397,11 +1439,12 @@ regexp_split_parse (const char* pattern,
 */
 
 caddr_t
-regexp_split_match (const char* pattern, const char* str, int* next, int c_opts)
+regexp_split_match (const char* pattern, const char* str, int* next, uint32_t c_opts)
 {
-  int str_len;
-  int r_opts = 0;
-  int offvect[NOFFSETS];
+  PCRE2_SIZE str_len;
+  uint32_t r_opts = 0;
+  PCRE2_SIZE *offvect;
+  pcre2_match_data *md;
   int result;
   caddr_t ret_str;
   caddr_t err = NULL;
@@ -1409,15 +1452,19 @@ regexp_split_match (const char* pattern,
   if (err)
     sqlr_resignal (err);
 
-  str_len = (int) strlen (str);
-  result = pcre_exec (cd_info->code, cd_info->code_x, str, str_len, 0, r_opts, offvect, NOFFSETS);
+  str_len = strlen (str);
+  create_match_context ();
+  md = pcre2_match_data_create (NOFFSETS, NULL);
+  result = pcre2_match (cd_info->code, (PCRE2_SPTR) str, str_len, 0, r_opts,
+                        md, match_ctxt);
+  offvect = pcre2_get_ovector_pointer (md);
   if (result != -1)
     {
       ret_str = dk_alloc_box (offvect[0] + 1, DV_STRING);
       strncpy (ret_str, str, offvect[0]);
       ret_str[offvect[0]] = 0;
       if (next)
-        next[0] = offvect[1];
+        next[0] = (int) offvect[1];
     }
   else
     {
@@ -1426,5 +1473,6 @@ regexp_split_match (const char* pattern,
         next[0] = -1;
     }
   release_compiled_regexp (compiled_regexps, cd_info);
+  pcre2_match_data_free (md);
   return ret_str;
 }
--- a/libsrc/Wi/srvstat.c
+++ b/libsrc/Wi/srvstat.c
@@ -419,8 +419,8 @@ extern int dbf_log_fsync;
 extern int dbf_assert_on_malformed_data;
 extern int dbf_max_itc_samples;
 
-extern int32 c_pcre_match_limit;
-extern int32 c_pcre_match_limit_recursion;
+extern uint32 c_pcre_match_limit;
+extern uint32 c_pcre_match_limit_recursion;
 extern int32 pcre_max_cache_sz;
 extern int64 users_cache_sz;
 extern int32 enable_cpt_rb_ck;
--- a/libsrc/Wi/sqlbif.h
+++ b/libsrc/Wi/sqlbif.h
@@ -324,12 +324,12 @@ extern caddr_t file_native_name (caddr_t
 extern caddr_t file_native_name_from_iri_path_nchars (const char *iri_path, size_t iri_path_len);
 caddr_t get_ssl_error_text (char *buf, int len);
 
-caddr_t regexp_match_01 (const char *pattern, const char *str, int c_opts);
-caddr_t regexp_match_01_const (const char* pattern, const char* str, int c_opts, void ** compiled_ret);
+caddr_t regexp_match_01 (const char *pattern, const char *str, uint32 c_opts);
+caddr_t regexp_match_01_const (const char* pattern, const char* str, uint32_t c_opts, void ** compiled_ret);
 int regexp_match_iri_const (int what, const char* str, caddr_t *err_ret);
-caddr_t regexp_split_match (const char* pattern, const char* str, int* next, int c_opts);
-int regexp_make_opts (const char* mode);
-int regexp_split_parse (const char* pattern, const char* str, int* offvect, int offvect_sz, int c_opts);
+caddr_t regexp_split_match (const char* pattern, const char* str, int* next, uint32_t c_opts);
+uint32_t regexp_make_opts (const char* mode);
+int regexp_split_parse (const char* pattern, const char* str, size_t* offvect, uint32_t offvect_sz, uint32_t c_opts);
 
 /*! Wrapper for uu_decode_part,
  modifies \c src input string! */
--- a/libsrc/Wi/xqf.c
+++ b/libsrc/Wi/xqf.c
@@ -41,7 +41,8 @@
 
 // Debian maintainer: replaced by external PCRE
 // #include "util/pcrelib/pcre.h"
-#include "pcre.h"
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
 
 #define ecm_isname(c) \
   ( ((c) & ~0xFF) ? (ecm_utf8props[(c)] & ECM_ISNAME) : \
@@ -1558,7 +1559,7 @@ xqf_starts_with  (xp_instance_t * xqi, X
 
 
 static void
-xqf_check_regexp (caddr_t pattern, int c_opts)
+xqf_check_regexp (caddr_t pattern, uint32_t c_opts)
 {
   caddr_t pre_res;
   int next;
@@ -1570,13 +1571,13 @@ xqf_check_regexp (caddr_t pattern, int c
     sqlr_new_error ("42001", "XRQ??", "invalid regular expression");
 }
 
-static int
+static uint32_t
 xqf_make_regexp_modes(const char * flag)
 {
-  int c_opts;
+  uint32_t c_opts;
   if ((c_opts=regexp_make_opts (flag)) == -1)
     sqlr_new_error ("42001", "XRQ??", "invalid regular expression flag");
-  c_opts |= PCRE_UTF8;
+  c_opts |= PCRE2_UTF;
   return c_opts;
 }
 
@@ -1588,7 +1589,7 @@ __xqf_tokenize (caddr_t str, caddr_t pat
   int next = 1;
   caddr_t str_inx = str;
   dk_set_t res_set = 0;
-  int c_opts;
+  uint32_t c_opts;
 
   c_opts=xqf_make_regexp_modes (flag);
   xqf_check_regexp (pattern, c_opts);
@@ -1646,7 +1647,7 @@ static void
 xqf_matches (xp_instance_t * xqi, XT * tree, xml_entity_t * ctx_xe)
 {
   caddr_t val1, val2, val3 = NULL;
-  int c_opts;
+  uint32_t c_opts;
 
   if (tree->_.xp_func.argcount)
     {
@@ -2021,7 +2022,7 @@ xqf_ends_with  (xp_instance_t * xqi, XT
 #define XQF_REPL_OK	0
 
 static int
-xqf_write_replacement (dk_session_t * ses, caddr_t input, int * offvect, int offvect_sz, caddr_t replacement)
+xqf_write_replacement (dk_session_t * ses, caddr_t input, size_t * offvect, int offvect_sz, caddr_t replacement)
 {
   int repl_sz = box_length (replacement) - 1;
   int idx = 0;
@@ -2060,7 +2061,7 @@ xqf_replace  (xp_instance_t * xqi, XT *
   caddr_t pattern = xpf_arg (xqi, tree, ctx_xe, DV_STRING, 1);
   caddr_t replacement = xpf_arg (xqi, tree, ctx_xe, DV_STRING, 2);
   caddr_t flag = 0;
-  int c_opts;
+  uint32_t c_opts;
   if (tree->_.xp_func.argcount > 3)
     flag = xpf_arg (xqi, tree, ctx_xe, DV_STRING, 3);
 
@@ -2068,7 +2069,7 @@ xqf_replace  (xp_instance_t * xqi, XT *
   xqf_check_regexp (pattern, c_opts);
 
   {
-    int offvect[128];
+    size_t offvect[128];
     int res = regexp_split_parse (pattern, input, offvect, 128, c_opts);
     int utf8_str_len = box_length (input) - 1;
     if (res != -1)
--- a/libsrc/Xml.new/datatypes.c
+++ b/libsrc/Xml.new/datatypes.c
@@ -25,7 +25,8 @@
 
 #include "xmlparser_impl.h"
 #include "schema.h"
-#include "pcre.h"
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
 
 ptrlong
 xs_get_primitive_typeidx (vxml_parser_t * parser, xs_component_t *type)
@@ -48,7 +49,7 @@ xs_get_primitive_typeidx (vxml_parser_t
   return (ptrlong)base_type;
 }
 
-extern caddr_t regexp_match_01 (const char* pattern, const char* str, int c_opts);
+extern caddr_t regexp_match_01 (const char* pattern, const char* str, uint32_t c_opts);
 
 int
 xs_check_type_compliance (vxml_parser_t * parser, xs_component_t *type,
@@ -69,7 +70,7 @@ xs_check_type_compliance (vxml_parser_t
   if (NULL != basetype_regexp)
     {
       int match_len = 1;
-      caddr_t match = regexp_match_01 (basetype_regexp, (const char *) value, PCRE_UTF8);
+      caddr_t match = regexp_match_01 (basetype_regexp, (const char *) value, PCRE2_UTF);
       if (match)
 	{
 	  match_len = box_length (match);
--- a/binsrc/virtuoso/Makefile.am
+++ b/binsrc/virtuoso/Makefile.am
@@ -39,7 +39,7 @@ else
     VIRTUOSO_BIN=virtuoso-t
 endif
 
-LIBS		+= @ZLIB_LIB@ -lminizip -lpcre
+LIBS		+= @ZLIB_LIB@ -lminizip -lpcre2-8
 
 lib_LTLIBRARIES = libvirtuoso-t.la $(IODBC_LIBS) $(MONO_LIBS) $(IODBC_MONO_LIBS)
 bin_PROGRAMS = virtuoso-t $(IODBC_PROGS)
--- a/binsrc/tests/Makefile.am
+++ b/binsrc/tests/Makefile.am
@@ -52,7 +52,7 @@ server_libs = \
 	$(top_builddir)/libsrc/Tidy/libtidy.la \
 	$(top_builddir)/libsrc/util/libutil.la \
 	@srvrlibs@ \
-	-lpcre -lminizip
+	-lpcre2-8 -lminizip
 
 client_libs = \
 	$(top_builddir)/libsrc/Wi/libwic.la \
@@ -60,7 +60,7 @@ client_libs = \
 	$(top_builddir)/libsrc/Thread/libthrs.la \
 	$(top_builddir)/libsrc/util/libutil.la \
 	@clntlibs@ \
-	-lpcre -lminizip
+	-lpcre2-8 -lminizip
 
 M2_SOURCES = chil.c
 M2_LDADD = \
