Description: switching from pcre3 to pcre2
 This was tested by running pcreTest (in cpp/src/GsnapParseTest1.cc)
 successfully. Yet I did not compile the other files.
Author: Pierre Gruet <pgt@debian.org>
Bug-Debian: https://bugs.debian.org/1000043
Forwarded: https://github.com/CampagneLaboratory/goby3/issues/9
Last-Update: 2021-11-26

--- a/cpp/src/GsnapParseTest1.cc
+++ b/cpp/src/GsnapParseTest1.cc
@@ -20,6 +20,7 @@
 #if	HAVE_CONFIG_H
 #include <config.h>
 #endif
+#include <string>
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -29,11 +30,13 @@
 #else
 #include <unistd.h>
 #endif
+#include <iostream>
 
 #include "goby/C_Gsnap.h"
 #include "goby/C_Alignments.h"
 #include "goby/C_Reads.h"
-#include <pcrecpp.h>
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
 
 using namespace	std;
 
@@ -178,21 +181,53 @@
 }
 
 void pcreTest()	{
-	char *a	= "s-reads:3.2..b:5.1,l_reads:3,c:3,d:5,a:0..b:2.8";
+	string a = string("s-reads:3.2..b:5.1,l_reads:3,c:3,d:5,a:0..b:2.8");
 	string temp1;
 	string da;	   string daClip;	string daProb;
 	string temp2;
 	string std;	   string stdClip;
 
-	pcrecpp::StringPiece input(a);	// Wrap	in a StringPiece
-	pcrecpp::RE	re("(([A-Za-z_\\-]+):(\\d+)\\.(\\d))|(([A-Za-z_\\-]+):(\\d))");
+        //Setting up pcre2.
+        pcre2_code *re;
+        string sPattern = string("(([A-Za-z_\\-]+):(\\d+)\\.(\\d))|(([A-Za-z_\\-]+):(\\d))");
+        PCRE2_SPTR pattern = (PCRE2_SPTR)(sPattern.c_str());
+        PCRE2_SPTR subject = (PCRE2_SPTR)(a.c_str());
+        int errornumber;
+        int rc;
+        PCRE2_SIZE erroroffset;
+        PCRE2_SIZE *ovector;
+        PCRE2_SIZE subject_length = (PCRE2_SIZE)(sPattern.length());
+        pcre2_match_data *match_data;
+        re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, &errornumber, &erroroffset, NULL);
+        match_data = pcre2_match_data_create_from_pattern(re, NULL);
+        PCRE2_SIZE start_offset = 0;
 
 	printf("pcre start\n");
-	while (re.FindAndConsume(&input,
-			&temp1,
-			&da, &daClip, &daProb,
-			&temp2,
-			&std, &stdClip)) {
+        do {
+            rc = pcre2_match(re, subject, subject_length, start_offset, 0, match_data, NULL);
+
+            if (rc != PCRE2_ERROR_NOMATCH) {
+                char** sStore = new char*[7];
+                //Match succeeded. Converting strings to the needed types.
+                ovector = pcre2_get_ovector_pointer(match_data);
+                for (int i = 0 ; i < 7 ; i++) {
+                    sStore[i] = new char[ovector[2 * i + 3] - ovector[2 * i + 2]];
+                    sprintf(sStore[i1], "%.*s", (int)(ovector[2 * i + 3] - ovector[2 * i + 2]), (char*)(subject + ovector[2 * i + 2]));
+                }
+                temp1 = string(sStore[0]);
+                da = string(sStore[1]);
+                daClip = string(sStore[2]);
+                daProb = string(sStore[3]);
+                temp2 = string(sStore[4]);
+                std = string(sStore[5]);
+                stdClip = string(sStore[6]);
+                for (int i = 0 ; i < 7 ; i++) {
+                    delete[] sStore[i];
+                }
+                delete[] sStore;
+                //Preparing next match.
+                start_offset = ovector[1];
+            }
 
 		if (da.length()	> 0) {
 			cout <<	"da=" << da	<< " ";
@@ -203,7 +238,7 @@
 			cout <<	"stdClip=" << stdClip << endl;
 		}
 
-	}
+	} while (rc != PCRE2_ERROR_NOMATCH);
 	printf("done\n");
 }
 
--- a/cpp/src/goby/C_Alignments.cc
+++ b/cpp/src/goby/C_Alignments.cc
@@ -18,7 +18,8 @@
 
 #include <string>
 #include <iostream>
-#include <pcrecpp.h>
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
 #include <stdio.h>
 
 #include "Reads.h"
@@ -872,8 +873,21 @@
     }
 
     void applyCigar(CSamHelper *samHelper) {
-        pcrecpp::RE re("([0-9]+)([SMID])");
-        pcrecpp::StringPiece input(samHelper->cpp_cigar->c_str());
+        //Setting up pcre2.
+        pcre2_code *re;
+        string sPattern = string("([0-9]+)([SMID])");
+        PCRE2_SPTR pattern = (PCRE2_SPTR)(sPattern.c_str());
+        PCRE2_SPTR subject = (PCRE2_SPTR)(samHelper->cpp_cigar->c_str());
+        int errornumber;
+        int rc;
+        PCRE2_SIZE erroroffset;
+        PCRE2_SIZE *ovector;
+        PCRE2_SIZE subject_length = (PCRE2_SIZE)(sPattern.length());
+        pcre2_match_data *match_data;
+        re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, &errornumber, &erroroffset, NULL);
+        match_data = pcre2_match_data_create_from_pattern(re, NULL);
+        PCRE2_SIZE start_offset = 0;
+
         debug (fprintf(stderr, ":: Applying cigar=%s\n", samHelper->cpp_cigar->c_str());)
         int length;
         char op;
@@ -883,7 +897,25 @@
         samHelper->numInsertions = 0;
         samHelper->numDeletions = 0;
         samHelper->numMisMatches = 0;
-        while (re.Consume(&input, &length, &op)) {
+
+        do {
+            rc = pcre2_match(re, subject, subject_length, start_offset, 0, match_data, NULL);
+
+            if (rc != PCRE2_ERROR_NOMATCH) {
+                //Match succeeded. Converting strings to the needed types.
+                ovector = pcre2_get_ovector_pointer(match_data);
+                char* sLength = new char[ovector[3] - ovector[2] + 1];
+                char* sOp = new char[ovector[5] - ovector[4] + 1];
+                sprintf(sLength, "%.*s", (int)(ovector[3] - ovector[2]), (char*)(subject + ovector[2]));
+                sprintf(sOp, "%.*s", (int)(ovector[5] - ovector[4]), (char*)(subject + ovector[4]));
+                length = stoi(string(sLength));
+                op = string(sOp)[0];
+                delete[] sLength;
+                delete[] sOp;
+                //Preparing next match.
+                start_offset = ovector[1];
+            }
+
             switch(op) {
                 case 'S':
                     // Soft clipping
@@ -933,20 +965,48 @@
                     break;
             }
             startOfCigar = false;
-        }
+        } while (rc != PCRE2_ERROR_NOMATCH);
     }
 
     void applyMd(CSamHelper *samHelper) {
         // My RE is simplified from the SAM spec but performs the same task
         // the major difference being mine would allow 5ACG where the current
         // spec would require 5A0C0G0 (which mine will still work with just fine).
-        pcrecpp::RE re("([0-9]+|[ACGTN]|\\^[ACGTN]+)");
-        pcrecpp::StringPiece input(samHelper->cpp_md->c_str());
+
+        //Setting up pcre2.
+        pcre2_code *re;
+        string sPattern = string("([0-9]+|[ACGTN]|\\^[ACGTN]+)");
+        PCRE2_SPTR pattern = (PCRE2_SPTR)(sPattern.c_str());
+        PCRE2_SPTR subject = (PCRE2_SPTR)(samHelper->cpp_md->c_str());
+        int errornumber;
+        int rc;
+        PCRE2_SIZE erroroffset;
+        PCRE2_SIZE *ovector;
+        PCRE2_SIZE subject_length = (PCRE2_SIZE)(sPattern.length());
+        pcre2_match_data *match_data;
+        re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, &errornumber, &erroroffset, NULL);
+        match_data = pcre2_match_data_create_from_pattern(re, NULL);
+        PCRE2_SIZE start_offset = 0;
+
         debug (fprintf(stderr, ":: Applying md=%s\n", samHelper->cpp_md->c_str());)
         string mdPart;
         int position = samHelper->numLeftClipped;
         int i;
-        while (re.Consume(&input, &mdPart)) {
+
+        do {
+            rc = pcre2_match(re, subject, subject_length, start_offset, 0, match_data, NULL);
+
+            if (rc != PCRE2_ERROR_NOMATCH) {
+                //Match succeeded.
+                ovector = pcre2_get_ovector_pointer(match_data);
+                char* sMdPart = new char[ovector[3] - ovector[2] + 1];
+                sprintf(sMdPart, "%.*s", (int)(ovector[3] - ovector[2]), (char*)(subject + ovector[2]));
+                mdPart = string(sMdPart);
+                delete[] sMdPart;
+                //Preparing next match.
+                start_offset = ovector[1];
+            }
+
             if (isdigit(mdPart[0])) {
                 int length = atoi(mdPart.c_str());
                 position += length;
@@ -962,7 +1022,7 @@
                     samHelper->numMisMatches++;
                 }
             }
-        }
+        } while (rc != PCRE2_ERROR_NOMATCH);
     }
 
     void samHelper_constructRefAndQuery(CSamHelper *samHelper) {
--- a/cpp/src/goby/C_Gsnap.cc
+++ b/cpp/src/goby/C_Gsnap.cc
@@ -27,7 +27,8 @@
 #include "C_Gsnap_structs.h"
 #include "C_Alignments.h"
 #include "hash.h"
-#include "pcrecpp.h"
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
 
 /*
  * TODO: Make sure the fields I moved from segment to alignment really
@@ -44,10 +45,6 @@
 
 using namespace std;
 
-using pcrecpp::StringPiece;
-using pcrecpp::RE;
-using pcrecpp::RE_Options;
-
 #undef C_GSNAP_DEBUG
 #ifdef C_GSNAP_DEBUG
 #define debug(x) x
@@ -359,13 +356,47 @@
             string temp2;
             string std;    string stdClip;
 
-            pcrecpp::StringPiece input(a);  // Wrap in a StringPiece
-            pcrecpp::RE re("(([A-Za-z_\\-]+):(\\d+)\\.(\\d+))|(([A-Za-z_\\-]+):(\\d+))");
+            //Setting up pcre2.
+            pcre2_code *re;
+            string sPattern = string("(([A-Za-z_\\-]+):(\\d+)\\.(\\d+))|(([A-Za-z_\\-]+):(\\d+))");
+            PCRE2_SPTR pattern = (PCRE2_SPTR)(sPattern.c_str());
+            PCRE2_SPTR subject = (PCRE2_SPTR)(a);
+            int errornumber;
+            int rc;
+            PCRE2_SIZE erroroffset;
+            PCRE2_SIZE *ovector;
+            PCRE2_SIZE subject_length = (PCRE2_SIZE)(sPattern.length());
+            pcre2_match_data *match_data;
+            re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, &errornumber, &erroroffset, NULL);
+            match_data = pcre2_match_data_create_from_pattern(re, NULL);
+            PCRE2_SIZE start_offset = 0;
+
+            do {
+                rc = pcre2_match(re, subject, subject_length, start_offset, 0, match_data, NULL);
+
+                if (rc != PCRE2_ERROR_NOMATCH) {
+                    char** sStore = new char*[7];
+                    //Match succeeded. Converting strings to the needed types.
+                    ovector = pcre2_get_ovector_pointer(match_data);
+                    for (int i = 0 ; i < 7 ; i++) {
+                        sStore[i] = new char[ovector[2 * i + 3] - ovector[2 * i + 2]];
+                        sprintf(sStore[i1], "%.*s", (int)(ovector[2 * i + 3] - ovector[2 * i + 2]), (char*)(subject + ovector[2 * i + 2]));
+                    }
+                    temp1 = string(sStore[0]);
+                    da = string(sStore[1]);
+                    daClip = string(sStore[2]);
+                    daProb = string(sStore[3]);
+                    temp2 = string(sStore[4]);
+                    std = string(sStore[5]);
+                    stdClip = string(sStore[6]);
+                    for (int i = 0 ; i < 7 ; i++) {
+                        delete[] sStore[i];
+                    }
+                    delete[] sStore;
+                    //Preparing next match.
+                    start_offset = ovector[1];
+                }
 
-            while (re.FindAndConsume(&input, &temp1,
-                                     &da, &daClip, &daProb,
-                                     &temp2,
-                                     &std, &stdClip)) {
                 if (da.length() > 0) {
                     if (da.length() > 12 && da.substr(0, 12) == "splice_dist_") {
                         // Remove the _1, _2 suffix
@@ -382,7 +413,7 @@
                     keys.push_back(std);
                     result[std] = stdClip;
                 }
-            }
+            } while (rc != PCRE2_ERROR_NOMATCH);
         }
         return pair<vector<string>,
                     LIBGOBY_HASH_MAP<string, string> >(keys, result);
--- a/cpp/configure.ac
+++ b/cpp/configure.ac
@@ -63,9 +63,9 @@
 # use pkg-config to make sure that protobuf is available
 # see http://code.google.com/p/protobuf/
 PKG_CHECK_MODULES([PROTOBUF], [protobuf >= 2.4.1]) 
-# use pkg-config to make sure that PCRE is available
+# use pkg-config to make sure that PCRE2 is available
 # see http://www.pcre.org/
-PKG_CHECK_MODULES([LIBPCRECPP], [libpcrecpp >= 8.10]) 
+PKG_CHECK_MODULES([LIBPCRE2], [libpcre2-8 >= 10.39])
 
 AC_CONFIG_FILES([Makefile src/Makefile goby.pc])
 AC_OUTPUT
--- a/cpp/goby.pc.in
+++ b/cpp/goby.pc.in
@@ -6,5 +6,5 @@
 Name: Goby
 Description: API for reading binary data files created using the Goby next-gen data management framework
 Version: @VERSION@
-Libs: -L${libdir} -lgoby @PROTOBUF_LIBS@ @LIBPCRECPP_LIBS@
-Cflags: -I${includedir} @PROTOBUF_CFLAGS@ @LIBPCRECPP_CFLAGS@
+Libs: -L${libdir} -lgoby @PROTOBUF_LIBS@ @LIBPCRE2_LIBS@
+Cflags: -I${includedir} @PROTOBUF_CFLAGS@ @LIBPCRE2_CFLAGS@
--- a/cpp/src/Makefile.am
+++ b/cpp/src/Makefile.am
@@ -22,10 +22,10 @@
 
 BUILT_SOURCES = goby.timestamp
 
-AM_CXXFLAGS = @PROTOBUF_CFLAGS@ @LIBPCRECPP_CFLAGS@ ${BOOST_CPPFLAGS}
-AM_CFLAGS = @PROTOBUF_CFLAGS@ @LIBPCRECPP_CFLAGS@ ${BOOST_CPPFLAGS}
+AM_CXXFLAGS = @PROTOBUF_CFLAGS@ @LIBPCRE2_CFLAGS@ ${BOOST_CPPFLAGS}
+AM_CFLAGS = @PROTOBUF_CFLAGS@ @LIBPCRE2_CFLAGS@ ${BOOST_CPPFLAGS}
 
-libgoby_la_LIBADD = @PROTOBUF_LIBS@ @LIBPCRECPP_LIBS@
+libgoby_la_LIBADD = @PROTOBUF_LIBS@ @LIBPCRE2_LIBS@
 libgoby_la_LDFLAGS = -version-info @version_info@ -export-dynamic # -no-undefined
 libgoby_la_SOURCES = \
 	goby/ICBFileInputStream.cc \
