1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
|
// Copyright 2007-2009 Russ Cox. All Rights Reserved.
// Copyright 2014 Paul Sokolovsky.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#ifndef _RE1_5_REGEXP__H
#define _RE1_5_REGEXP__H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <assert.h>
#define nil ((void*)0)
#define nelem(x) (sizeof(x)/sizeof((x)[0]))
typedef struct Regexp Regexp;
typedef struct Prog Prog;
typedef struct ByteProg ByteProg;
typedef struct Inst Inst;
typedef struct Subject Subject;
struct Regexp
{
int type;
int n;
int ch;
Regexp *left;
Regexp *right;
};
enum /* Regexp.type */
{
Alt = 1,
Cat,
Lit,
Dot,
Paren,
Quest,
Star,
Plus,
};
Regexp *parse(char*);
Regexp *reg(int type, Regexp *left, Regexp *right);
void printre(Regexp*);
#ifndef re1_5_fatal
void re1_5_fatal(char*);
#endif
#ifndef re1_5_stack_chk
#define re1_5_stack_chk()
#endif
void *mal(int);
struct Prog
{
Inst *start;
int len;
};
struct ByteProg
{
int bytelen;
int len;
int sub;
char insts[0];
};
struct Inst
{
int opcode;
int c;
int n;
Inst *x;
Inst *y;
int gen; // global state, oooh!
};
enum /* Inst.opcode */
{
// Instructions which consume input bytes (and thus fail if none left)
CONSUMERS = 1,
Char = CONSUMERS,
Any,
Class,
ClassNot,
NamedClass,
ASSERTS = 0x50,
Bol = ASSERTS,
Eol,
// Instructions which take relative offset as arg
JUMPS = 0x60,
Jmp = JUMPS,
Split,
RSplit,
// Other (special) instructions
Save = 0x7e,
Match = 0x7f,
};
#define inst_is_consumer(inst) ((inst) < ASSERTS)
#define inst_is_jump(inst) ((inst) & 0x70 == JUMPS)
Prog *compile(Regexp*);
void printprog(Prog*);
extern int gen;
enum {
MAXSUB = 20
};
typedef struct Sub Sub;
struct Sub
{
int ref;
int nsub;
const char *sub[MAXSUB];
};
Sub *newsub(int n);
Sub *incref(Sub*);
Sub *copy(Sub*);
Sub *update(Sub*, int, const char*);
void decref(Sub*);
struct Subject {
const char *begin_line;
const char *begin;
const char *end;
};
#define NON_ANCHORED_PREFIX 5
#define HANDLE_ANCHORED(bytecode, is_anchored) ((is_anchored) ? (bytecode) + NON_ANCHORED_PREFIX : (bytecode))
#define RE15_CLASS_NAMED_CLASS_INDICATOR 0
int re1_5_backtrack(ByteProg*, Subject*, const char**, int, int);
int re1_5_pikevm(ByteProg*, Subject*, const char**, int, int);
int re1_5_recursiveloopprog(ByteProg*, Subject*, const char**, int, int);
int re1_5_recursiveprog(ByteProg*, Subject*, const char**, int, int);
int re1_5_thompsonvm(ByteProg*, Subject*, const char**, int, int);
int re1_5_sizecode(const char *re);
int re1_5_compilecode(ByteProg *prog, const char *re);
void re1_5_dumpcode(ByteProg *prog);
void cleanmarks(ByteProg *prog);
int _re1_5_classmatch(const char *pc, const char *sp);
int _re1_5_namedclassmatch(const char *pc, const char *sp);
#endif /*_RE1_5_REGEXP__H*/
|