1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
|
(* NFA *)
type node = {
id : int;
mutable eps : node list;
mutable trans : (Cset.t * node) list;
}
(* Compilation regexp -> NFA *)
type regexp = node -> node
let cur_id = ref 0
let new_node () =
incr cur_id;
{ id = !cur_id; eps = []; trans = [] }
let seq r1 r2 succ = r1 (r2 succ)
let alt r1 r2 succ =
let n = new_node () in
n.eps <- [r1 succ; r2 succ];
n
let rep r succ =
let n = new_node () in
n.eps <- [r n; succ];
n
let plus r succ =
let n = new_node () in
let nr = r n in
n.eps <- [nr; succ];
nr
let eps succ = succ
let chars c succ =
let n = new_node () in
n.trans <- [c,succ];
n
let compile_re re =
let final = new_node () in
(re final, final)
(* Determinization *)
type state = node list
let rec add_node state node =
if List.memq node state then state else add_nodes (node::state) node.eps
and add_nodes state nodes =
List.fold_left add_node state nodes
let transition state =
(* Merge transition with the same target *)
let rec norm = function
| (c1,n1)::((c2,n2)::q as l) ->
if n1 == n2 then norm ((Cset.union c1 c2,n1)::q)
else (c1,n1)::(norm l)
| l -> l in
let t = List.concat (List.map (fun n -> n.trans) state) in
let t = norm (List.sort (fun (c1,n1) (c2,n2) -> n1.id - n2.id) t) in
(* Split char sets so as to make them disjoint *)
let rec split (all,t) ((c0 : Cset.t),n0) =
let t =
[(Cset.difference c0 all, [n0])] @
List.map (fun (c,ns) -> (Cset.intersection c c0, n0::ns)) t @
List.map (fun (c,ns) -> (Cset.difference c c0, ns)) t in
(Cset.union all c0,
List.filter (fun (c,ns) -> not (Cset.is_empty c)) t) in
let (_,t) = List.fold_left split (Cset.empty,[]) t in
(* Epsilon closure of targets *)
let t = List.map (fun (c,ns) -> (c,add_nodes [] ns)) t in
(* Canonical ordering *)
let t = Array.of_list t in
Array.sort (fun (c1,ns1) (c2,ns2) -> compare c1 c2) t;
Array.map fst t, Array.map snd t
let find_alloc tbl counter x =
try Hashtbl.find tbl x
with Not_found ->
let i = !counter in
incr counter;
Hashtbl.add tbl x i;
i
let part_tbl = Hashtbl.create 31
let part_id = ref 0
let get_part (t : Cset.t array) = find_alloc part_tbl part_id t
let compile rs =
let rs = Array.map compile_re rs in
let counter = ref 0 in
let states = Hashtbl.create 31 in
let states_def = ref [] in
let rec aux state =
try Hashtbl.find states state
with Not_found ->
let i = !counter in
incr counter;
Hashtbl.add states state i;
let (part,targets) = transition state in
let part = get_part part in
let targets = Array.map aux targets in
let finals = Array.map (fun (_,f) -> List.mem f state) rs in
states_def := (i, (part,targets,finals)) :: !states_def;
i
in
let init = ref [] in
Array.iter (fun (i,_) -> init := add_node !init i) rs;
ignore (aux !init);
Array.init !counter (fun id -> List.assoc id !states_def)
let partitions () =
let aux part =
let seg = ref [] in
Array.iteri
(fun i c ->
List.iter (fun (a,b) -> seg := (a,b,i) :: !seg) c)
part;
List.sort (fun (a1,_,_) (a2,_,_) -> compare a1 a2) !seg in
let res = ref [] in
Hashtbl.iter (fun part i -> res := (i, aux part) :: !res) part_tbl;
Hashtbl.clear part_tbl;
!res
|