1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
|
Description: Port from pcre3 to pcre2
Bug-Debian: https://bugs.debian.org/1000000
Author: Andreas Tille <tille@debian.org>
Last-Update: 2021-11-19
Forwarded: https://github.com/CshlSiepelLab/phast/issues/49
--- a/include/phast/stringsplus.h
+++ b/include/phast/stringsplus.h
@@ -24,7 +24,8 @@
#ifndef STRINGSPLUS_H
#define STRINGSPLUS_H
-#include <pcre.h>
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
#include "phast/lists.h"
#include "stdio.h"
@@ -56,9 +57,6 @@ typedef struct {
int nchars; /**< Number of bytes currently allocated */
} String;
-/** PCRE is another name for Regex */
-typedef pcre Regex;
-
/** \name String Allocate/Cleanup functions
\{ */
@@ -99,7 +97,7 @@ void str_clear(String *s);
/** \} */
-/* void str_match(String *s, Regexp *r); */ /* tags? */
+/* void str_match(String *s, pcre2_compile_context *r); */ /* tags? */
/** \name String Append functions */
@@ -385,13 +383,13 @@ int str_split(String *s, const char* del
expression syntax.
@result Newly allocated and compiled Regex object.
*/
-Regex *str_re_new(const char *re_str);
+pcre2_code *str_re_new(const unsigned char *re_str);
/** Free resources associated with regular expression object.
@param re Regex object to free
@note The object itself is freed also.
*/
-void str_re_free(Regex *re);
+void str_re_free(pcre2_compile_context *re);
/** Test whether the specified string matches the specified regex.
@pre The list 'l' must be initialized externally if non-NULL.
@@ -403,11 +401,11 @@ void str_re_free(Regex *re);
on no match, and -2 on error.
@note NULLs will be added for all non-matching groups in list 'l'
@note In the list 'l', the 0th substring corresponds to the entire regex.
- @note This function uses the pcre_exec function of the PCRE
+ @note This function uses the pcre2_exec function of the PCRE2
regex package.
@warning Substrings added to List l are newly allocated and must be
freed externally. */
-int str_re_match(String *s, Regex *re, List *l, int nsubexp);
+int str_re_match(String *s, pcre2_compile_context *re, List *l, int nsubexp);
/** Search the specified string for the first instance of the specified
regex.
@@ -415,14 +413,14 @@ int str_re_match(String *s, Regex *re, L
@param start_offset The first start_offset characters will be ignored.
@param l (Optional) If non-NULL, it will be populated with substrings corresponding
to subexpressions, as described under str_re_match.
- @note This function uses the pcre_exec function of the PCRE regex package.
+ @note This function uses the pcre2_exec function of the PCRE2 regex package.
@result Index of first match, -1 if no match exists, or -2 if an
internal error occurs.
@warning Substrings added to List l are newly allocated and must be
freed externally.
@see str_re_match
*/
-int str_re_search(String *s, Regex *re, int start_offset, List *l,
+int str_re_search(String *s, pcre2_compile_context *re, int start_offset, List *l,
int nsubexp);
/** \} */
--- a/src/lib/base/phast_stringsplus.c
+++ b/src/lib/base/phast_stringsplus.c
@@ -12,7 +12,8 @@
$Id: stringsplus.c,v 1.12 2009-02-19 23:33:48 agd27 Exp $ */
-#include <pcre.h>
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
#include "phast/stringsplus.h"
#include "phast/misc.h"
#include <stdlib.h>
@@ -462,47 +463,51 @@ int str_ends_with_charstr(String *s, con
return (strncmp(&s->chars[s->length - len], substr, len) == 0);
}
-Regex *str_re_new(const char *re_str) {
- Regex *re;
- const char *errstr;
- int erroffset;
+pcre2_code *str_re_new(const unsigned char *re_str) {
+ pcre2_code *re;
+ int errorcode;
+ PCRE2_SIZE erroffset;
- re = pcre_compile(re_str, 0, &errstr, &erroffset, NULL);
+ re = pcre2_compile(re_str, PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroffset, NULL);
if (re == NULL) {
- die("ERROR: cannot compile regular expression '%s' (%d): %s\n",
- re_str, erroffset, errstr);
+ die("ERROR: cannot compile regular expression '%s' (%d): %d\n",
+ re_str, erroffset, errorcode);
}
return re;
}
-//NOTE Regex are allocated by pcre; do not use sfree
-void str_re_free(Regex *re) {
+//NOTE Regex are allocated by pcre2; do not use sfree
+void str_re_free(pcre2_compile_context *re) {
if (re != NULL)
free(re);
}
#define OVECCOUNT 300
-int str_re_match_sub(String *s, Regex *re, List *l, int offset, int nsubexp,
+int str_re_match_sub(String *s, pcre2_code *re, List *l, int offset, int nsubexp,
int *first_match) {
- int i, len, rc, ovector[OVECCOUNT], rv;
+ int i, len, rc, rv;
+ PCRE2_SIZE *ovector;
+
String *substr;
+ pcre2_match_data *match_data = pcre2_match_data_create(OVECCOUNT, NULL);
/* WARNING: lst_clear DOES NOT free memory associated with the contents,
so must free substrings from previous calls if these are no longer being
used or there will be a memory leak! */
if (l != NULL) lst_clear(l);
- rc = pcre_exec(re, NULL, s->chars, s->length, offset, 0, ovector, OVECCOUNT);
- if (rc == PCRE_ERROR_NOMATCH) return -1;
+ rc = pcre2_match(re, s->chars, s->length, offset, 0, match_data, NULL);
+ if (rc == PCRE2_ERROR_NOMATCH) return -1;
if (rc < 0) return -2; //any other error
if (first_match != NULL) (*first_match) = ovector[0];
+ ovector = pcre2_get_ovector_pointer(match_data);
rv = ovector[1]-ovector[0];
if (rc >= 0 && l != NULL) {
if (rc == 0) {
printf("nsubexp=%i rc=%i\n", nsubexp, rc);
- fprintf(stderr, "Warning: pcre_exec only has room for %d captured substrings. May need to increase OVECCOUNT and re-compile\n", OVECCOUNT/3);
+ fprintf(stderr, "Warning: pcre2_exec only has room for %d captured substrings. May need to increase OVECCOUNT and re-compile\n", OVECCOUNT/3);
rc = OVECCOUNT/3;
}
for (i = 0; i < rc && i <= nsubexp; i++) {
@@ -523,11 +528,11 @@ int str_re_match_sub(String *s, Regex *r
}
-int str_re_match(String *s, Regex *re, List *l, int nsubexp) {
+int str_re_match(String *s, pcre2_compile_context *re, List *l, int nsubexp) {
return str_re_match_sub(s, re, l, 0, nsubexp, NULL);
}
-int str_re_search(String *s, Regex *re, int start_offset, List *l,
+int str_re_search(String *s, pcre2_compile_context *re, int start_offset, List *l,
int nsubexp) {
int first_match_idx, rc;
rc = str_re_match_sub(s, re, l, start_offset, nsubexp, &first_match_idx);
--- a/src/make-include.mk
+++ b/src/make-include.mk
@@ -137,7 +137,7 @@ LIBS = -lphast -framework Accelerate -lc
else
ifdef CLAPACKPATH
ifneq ($(TARGETOS), Windows)
- LIBS = -lphast -llapack -ltmglib -lblas -lc -lm -lpcre $(LDFLAGS)
+ LIBS = -lphast -llapack -ltmglib -lblas -lc -lm -lpcre2-8 $(LDFLAGS)
else
CFLAGS += -I${CLAPACKPATH}/INCLUDE -I${F2CPATH} -DPCRE_STATIC
LIBS = -lphast -lm ${CLAPACKPATH}/liblapack.a ${CLAPACKPATH}/libf2c.a ${CLAPACKPATH}/libblas.a
--- a/src/dless/dlessP.c
+++ b/src/dless/dlessP.c
@@ -196,7 +196,7 @@ void do_p_values(BDPhyloHmm *bdphmm, GFF
JumpProcess *jp;
List *types = lst_new_ptr(nnodes * 2), *type_lists = lst_new_ptr(nnodes * 2);
TreeModel *mod = bdphmm->phmm->mods[0]; /* nonconserved */
- Regex *id_re = str_re_new(".*id \"([^\"]*)\"");
+ pcre2_compile_context *id_re = str_re_new(".*id \"([^\"]*)\"");
String *id = str_new(STR_SHORT_LEN);
List *l = lst_new_ptr(1);
--- a/src/lib/base/phast_misc.c
+++ b/src/lib/base/phast_misc.c
@@ -670,7 +670,7 @@ int draw_index(double *p, int size) {
character as well as "->" to indicate mapping. */
struct hash_table *make_name_hash(char *mapstr) {
Hashtable *retval = hsh_new(20);
- Regex *map_re = str_re_new("^[[:space:]]*([A-Za-z0-9_]+)[[:space:]]*(->|=)[[:space:]]*([A-Za-z0-9_]+)[[:space:]]*");
+ pcre2_compile_context *map_re = str_re_new("^[[:space:]]*([A-Za-z0-9_]+)[[:space:]]*(->|=)[[:space:]]*([A-Za-z0-9_]+)[[:space:]]*");
List *mappings = lst_new_ptr(20), *names = lst_new_ptr(3);
String *s = str_new_charstr(mapstr);
int i;
--- a/src/lib/feature/phast_bed.c
+++ b/src/lib/feature/phast_bed.c
@@ -140,7 +140,7 @@ void gff_print_bed(FILE *OUTF, GFF_Set
if (lst_size(gff->features) == 0) return; /* now can assume at least one feature */
if (!use_groups) {
- Regex *tag_val_re = str_re_new("[[:alnum:]_.]+[[:space:]]+(\"[^\"]*\"|[^[:space:]]+)");
+ pcre2_compile_context *tag_val_re = str_re_new("[[:alnum:]_.]+[[:space:]]+(\"[^\"]*\"|[^[:space:]]+)");
List *l = lst_new_ptr(2);
int ncols = 4;
--- a/src/lib/feature/phast_category_map.c
+++ b/src/lib/feature/phast_category_map.c
@@ -26,11 +26,11 @@ CategoryMap *cm_read(FILE *F) {
int cat, cat2, lineno, i, cm_read_error;
CategoryMap *cm = NULL;
CategoryRange *existing_range;
- static Regex *cat_range_re = NULL;
- static Regex *ncats_re = NULL;
- static Regex *fill_re = NULL;
- static Regex *label_re = NULL;
- static Regex *extend_re = NULL;
+ static pcre2_compile_context *cat_range_re = NULL;
+ static pcre2_compile_context *ncats_re = NULL;
+ static pcre2_compile_context *fill_re = NULL;
+ static pcre2_compile_context *label_re = NULL;
+ static pcre2_compile_context *extend_re = NULL;
int has_dependencies = 0;
line = str_new(STR_SHORT_LEN);
--- a/src/lib/feature/phast_gff.c
+++ b/src/lib/feature/phast_gff.c
@@ -38,7 +38,7 @@ GFF_Set* gff_read_set(FILE *F) {
GFF_Feature *feat;
GFF_Set *set;
List *l, *substrs;
- static Regex *spec_comment_re = NULL;
+ static pcre2_compile_context *spec_comment_re = NULL;
line = str_new(STR_LONG_LEN);
set = gff_new_set();
@@ -267,7 +267,7 @@ GFF_Feature *gff_new_feature_genomic_pos
int score_is_null) {
GFF_Feature *retval = NULL;
List *substrs = lst_new_ptr(4);
- static Regex *posre = NULL;
+ static pcre2_compile_context *posre = NULL;
if (posre == NULL)
posre = str_re_new("(chr[_a-zA-Z0-9]+):([0-9]+)-([0-9]+)([-+])?");
@@ -667,7 +667,7 @@ void gff_sort_within_groups(GFF_Set *set
undefined values will be placed in a single group. */
void gff_group(GFF_Set *set, char *tag) {
char *tmpstr=smalloc((100+strlen(tag))*sizeof(char));
- Regex *tag_re;
+ pcre2_compile_context *tag_re;
List *l = lst_new_ptr(1);
int est_no_groups = max(lst_size(set->features) / 10, 1);
Hashtable *hash = hsh_new(est_no_groups);
--- a/src/lib/motif/phast_tfbs.c
+++ b/src/lib/motif/phast_tfbs.c
@@ -137,8 +137,8 @@ List *pwm_read(const char *filename) {
List *l = lst_new_ptr(3);
List *probabilitiesStr = lst_new_ptr(4);
List *probabilitiesDbl;
- Regex *pssm_re = NULL;
- Regex *motif_name_re = NULL;
+ pcre2_compile_context *pssm_re = NULL;
+ pcre2_compile_context *motif_name_re = NULL;
int alphabetLength;
result = lst_new_ptr(1);
@@ -215,7 +215,7 @@ int ms_alph_has_lowercase(MS *ms) {
MS *ms_read(const char *filename, const char *alphabet) {
List *names = lst_new_ptr(10);
List *seqs = lst_new_ptr(10);
- static Regex *descrip_re = NULL;
+ static pcre2_compile_context *descrip_re = NULL;
int i, nseqs, j, do_toupper, line_no;
String *line = str_new(STR_MED_LEN);
List *l = lst_new_ptr(2);
--- a/src/lib/msa/phast_local_alignment.c
+++ b/src/lib/msa/phast_local_alignment.c
@@ -47,7 +47,7 @@ LocalPwAlignment *la_read_lav(FILE *F, i
int line_no=0;
LocalPwAlignment *lpwa = la_new();
List *fields = lst_new_ptr(6);
- Regex *stanza_start_re = str_re_new("^([dshaxm])[[:space:]]*{");
+ pcre2_compile_context *stanza_start_re = str_re_new("^([dshaxm])[[:space:]]*{");
AlignmentBlock *aln_block = NULL;
char stanza_type = '\0';
int i;
--- a/src/lib/msa/phast_msa.c
+++ b/src/lib/msa/phast_msa.c
@@ -253,7 +253,7 @@ MSA *msa_create_copy(MSA *msa, int suff_
MSA *msa_read_fasta(FILE *F, char *alphabet) {
List *names = lst_new_ptr(10);
List *seqs = lst_new_ptr(10);
- static Regex *descrip_re = NULL;
+ static pcre2_compile_context *descrip_re = NULL;
int maxlen, i, nseqs, j, do_toupper, line_no;
String *line = str_new(STR_MED_LEN);
List *l = lst_new_ptr(2);
@@ -1921,7 +1921,7 @@ GFF_Set *msa_get_informative_feats(MSA *
/* read and return a single sequence from a FASTA file */
String *msa_read_seq_fasta(FILE *F) {
- static Regex *descrip_re = NULL;
+ static pcre2_compile_context *descrip_re = NULL;
String *line = str_new(STR_MED_LEN);
String *seq = NULL;
@@ -2581,7 +2581,7 @@ msa_format_type msa_format_for_content(F
msa_format_type retval = UNKNOWN_FORMAT;
String *line = str_new(STR_MED_LEN);
List *matches = lst_new_ptr(3);
- Regex *ss_re, *phylip_re, *fasta_re, *lav_re, *maf_re;
+ pcre2_compile_context *ss_re, *phylip_re, *fasta_re, *lav_re, *maf_re;
//using peek instead of read as we don't want to affect file/stream position
str_peek_next_line(line, F);
--- a/src/lib/msa/phast_multi_msa.c
+++ b/src/lib/msa/phast_multi_msa.c
@@ -51,9 +51,9 @@
abort if the sequence contains a character not in the alphabet. */
Multi_MSA *multimsa_new_from_files(FILE *F) {
- Regex *blocks_re = str_re_new("#[[:space:]]*BLOCKS[[:space:]]*=[[:space:]]*([0-9]+)");
- Regex *alph_re = str_re_new("#[[:space:]]*ALPHABET[[:space:]]*=[[:space:]]*([A-Z]+)");
- Regex *format_re = str_re_new("#[[:space:]]*FORMAT[[:space:]]*=[[:space:]]*([A-Z]+)");
+ pcre2_compile_context *blocks_re = str_re_new("#[[:space:]]*BLOCKS[[:space:]]*=[[:space:]]*([0-9]+)");
+ pcre2_compile_context *alph_re = str_re_new("#[[:space:]]*ALPHABET[[:space:]]*=[[:space:]]*([A-Z]+)");
+ pcre2_compile_context *format_re = str_re_new("#[[:space:]]*FORMAT[[:space:]]*=[[:space:]]*([A-Z]+)");
int i, num_msa, line_no=0;
char *msa_fname;
--- a/src/lib/msa/phast_sufficient_stats.c
+++ b/src/lib/msa/phast_sufficient_stats.c
@@ -649,7 +649,7 @@ void ss_write(MSA *msa, FILE *F, int sho
/* make reading order optional? alphabet argument overrides alphabet
in file (use NULL to use version in file) */
MSA* ss_read(FILE *F, char *alphabet) {
- Regex *nseqs_re, *length_re, *tuple_size_re, *ntuples_re, *tuple_re,
+ pcre2_compile_context *nseqs_re, *length_re, *tuple_size_re, *ntuples_re, *tuple_re,
*names_re, *alph_re, *ncats_re, *order_re, *offset_re;
String *line, *alph = NULL;
int nseqs, length, tuple_size, ntuples, i, ncats = -99, header_done = 0,
--- a/src/lib/phylo/phast_phylo_p_print.c
+++ b/src/lib/phylo/phast_phylo_p_print.c
@@ -749,7 +749,7 @@ void print_feats_generic(FILE *outfile,
String *name;
va_list ap;
double *data[ncols+1];
- Regex *tag_val_re = str_re_new("[[:alnum:]_.]+[[:space:]]+(\"[^\"]*\"|[^[:space:]]+)");
+ pcre2_compile_context *tag_val_re = str_re_new("[[:alnum:]_.]+[[:space:]]+(\"[^\"]*\"|[^[:space:]]+)");
List *l = lst_new_ptr(2);
char **colname;
List **resultList=NULL;
--- a/src/prequel/phast_pbs_code.c
+++ b/src/prequel/phast_pbs_code.c
@@ -85,7 +85,7 @@ void pbs_free(PbsCode *code) {
}
PbsCode *pbs_new_from_file(FILE *F) {
- Regex *nrows_re = str_re_new("##NROWS[[:space:]]*=[[:space:]]*([0-9]+)"),
+ pcre2_compile_context *nrows_re = str_re_new("##NROWS[[:space:]]*=[[:space:]]*([0-9]+)"),
*dimension_re = str_re_new("##DIMENSION[[:space:]]*=[[:space:]]*([0-9]+)"),
*nbytes_re = str_re_new("##NBYTES[[:space:]]*=[[:space:]]*([0-9]+)"),
*codesize_re = str_re_new("##CODESIZE[[:space:]]*=[[:space:]]*([0-9]+)");
--- a/src/util/msa_view.c
+++ b/src/util/msa_view.c
@@ -358,7 +358,7 @@ OPTIONS:\n\
void fill_with_Ns(MSA *msa, List *fill_N_list, msa_coord_map *map) {
int i, j, nseq, nstart, nend;
- Regex* fill_N_re = str_re_new("([[:digit:]]+):([[:digit:]]+)-([[:digit:]]+)");
+ pcre2_compile_context* fill_N_re = str_re_new("([[:digit:]]+):([[:digit:]]+)-([[:digit:]]+)");
List *word_list = lst_new_ptr(4);
for (i = 0; i < lst_size(fill_N_list); i++) {
String *s = lst_get_ptr(fill_N_list, i);
|