1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
|
/* classes: h_files */
#ifndef RXNFAH
#define RXNFAH
/* Copyright (C) 1995, 1996 Tom Lord
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Library General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this software; see the file COPYING. If not, write to
* the Free Software Foundation, 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
/*
* Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu)
*/
#include "_rx.h"
#include "rxnode.h"
/* NFA
*
* A syntax tree is compiled into an NFA. This page defines the structure
* of that NFA.
*/
struct rx_nfa_state
{
/* These are kept in a list as the NFA is being built.
* Here is the link.
*/
struct rx_nfa_state *next;
/* After the NFA is built, states are given integer id's.
* States whose outgoing transitions are all either epsilon or
* side effect edges are given ids less than 0. Other states
* are given successive non-negative ids starting from 0.
*
* Here is the id for this state:
*/
int id;
/* The list of NFA edges that go from this state to some other. */
struct rx_nfa_edge *edges;
/* If you land in this state, then you implicitly land
* in all other states reachable by only epsilon translations.
* Call the set of maximal loop-less paths to such states the
* epsilon closure of this state.
*
* There may be other states that are reachable by a mixture of
* epsilon and side effect edges. Consider the set of maximal loop-less
* paths of that sort from this state. Call it the epsilon-side-effect
* closure of the state.
*
* The epsilon closure of the state is a subset of the epsilon-side-
* effect closure. It consists of all the paths that contain
* no side effects -- only epsilon edges.
*
* The paths in the epsilon-side-effect closure can be partitioned
* into equivalance sets. Two paths are equivalant if they have the
* same set of side effects, in the same order. The epsilon-closure
* is one of these equivalance sets. Let's call these equivalance
* sets: observably equivalant path sets. That name is chosen
* because equivalance of two paths means they cause the same side
* effects -- so they lead to the same subsequent observations other
* than that they may wind up in different target states.
*
* The superstate nfa, which is derived from this nfa, is based on
* the observation that all of the paths in an observably equivalant
* path set can be explored at the same time, provided that the
* matcher keeps track not of a single nfa state, but of a set of
* states. In particular, after following all the paths in an
* observably equivalant set, you wind up at a set of target states.
* That set of target states corresponds to one state in the
* superstate NFA.
*
* Staticly, before matching begins, it is convenient to analyze the
* nfa. Each state is labeled with a list of the observably
* equivalant path sets who's union covers all the
* epsilon-side-effect paths beginning in this state. This list is
* called the possible futures of the state.
*
* A trivial example is this NFA:
* s1
* A ---> B
*
* s2
* ---> C
*
* epsilon s1
* ---------> D ------> E
*
*
* In this example, A has two possible futures.
* One invokes the side effect `s1' and contains two paths,
* one ending in state B, the other in state E.
* The other invokes the side effect `s2' and contains only
* one path, landing in state C.
*
* Here is a list of the possible futures of this state:
*/
struct rx_possible_future *futures;
int futures_computed:1;
/* There is exactly one distinguished starting state in every NFA: */
unsigned int is_start:1;
int has_cset_edges:1;
/* There may be many final states if the "cut" operator was used.
* each will have a different non-0 value for this field:
*/
int is_final;
/* These are used internally during NFA construction... */
unsigned int eclosure_needed:1;
unsigned int mark:1;
};
/* An edge in an NFA is typed:
*/
enum rx_nfa_etype
{
/* A cset edge is labled with a set of characters one of which
* must be matched for the edge to be taken.
*/
ne_cset,
/* An epsilon edge is taken whenever its starting state is
* reached.
*/
ne_epsilon,
/* A side effect edge is taken whenever its starting state is
* reached. Side effects may cause the match to fail or the
* position of the matcher to advance.
*/
ne_side_effect
};
struct rx_nfa_edge
{
struct rx_nfa_edge *next;
enum rx_nfa_etype type;
struct rx_nfa_state *dest;
union
{
rx_Bitset cset;
void * side_effect;
} params;
};
/* A possible future consists of a list of side effects
* and a set of destination states. Below are their
* representations. These structures are hash-consed so
* that lists with the same elements share a representation
* (their addresses are ==).
*/
struct rx_nfa_state_set
{
struct rx_nfa_state * car;
struct rx_nfa_state_set * cdr;
};
struct rx_se_list
{
void * car;
struct rx_se_list * cdr;
};
struct rx_possible_future
{
struct rx_possible_future *next;
struct rx_se_list * effects;
struct rx_nfa_state_set * destset;
};
#ifdef __STDC__
extern struct rx_nfa_state * rx_nfa_state (struct rx *rx);
extern struct rx_nfa_edge * rx_nfa_edge (struct rx *rx,
enum rx_nfa_etype type,
struct rx_nfa_state *start,
struct rx_nfa_state *dest);
extern int rx_build_nfa (struct rx *rx,
struct rexp_node *rexp,
struct rx_nfa_state **start,
struct rx_nfa_state **end);
extern struct rx_possible_future * rx_state_possible_futures (struct rx * rx, struct rx_nfa_state * n);
extern void rx_free_nfa (struct rx *rx);
#else /* STDC */
extern struct rx_nfa_state * rx_nfa_state ();
extern struct rx_nfa_edge * rx_nfa_edge ();
extern int rx_build_nfa ();
extern struct rx_possible_future * rx_state_possible_futures ();
extern void rx_free_nfa ();
#endif /* STDC */
#endif /* RXNFAH */
|