File: ulex.ml

package info (click to toggle)
ulex0.8 0.8-6
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 184 kB
  • ctags: 264
  • sloc: ml: 975; sh: 96; makefile: 48
file content (134 lines) | stat: -rwxr-xr-x 3,309 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
(* NFA *)

type node = { 
  id : int; 
  mutable eps : node list; 
  mutable trans : (Cset.t * node) list;
}

(* Compilation regexp -> NFA *)

type regexp = node -> node

let cur_id = ref 0
let new_node () =
  incr cur_id;
  { id = !cur_id; eps = []; trans = [] }

let seq r1 r2 succ = r1 (r2 succ)

let alt r1 r2 succ =
  let n = new_node () in
  n.eps <- [r1 succ; r2 succ];
  n

let rep r succ =
  let n = new_node () in
  n.eps <- [r n; succ];
  n

let plus r succ =
  let n = new_node () in
  let nr = r n in
  n.eps <- [nr; succ];
  nr

let eps succ = succ

let chars c succ =
  let n = new_node () in
  n.trans <- [c,succ];
  n

let compile_re re =
  let final = new_node () in
  (re final, final)

(* Determinization *)

type state = node list

let rec add_node state node = 
  if List.memq node state then state else add_nodes (node::state) node.eps
and add_nodes state nodes =
  List.fold_left add_node state nodes


let transition state =
  (* Merge transition with the same target *)
  let rec norm = function
    | (c1,n1)::((c2,n2)::q as l) ->
	if n1 == n2 then norm ((Cset.union c1 c2,n1)::q)
	else (c1,n1)::(norm l)
    | l -> l in
  let t = List.concat (List.map (fun n -> n.trans) state) in
  let t = norm (List.sort (fun (c1,n1) (c2,n2) -> n1.id - n2.id) t) in

  (* Split char sets so as to make them disjoint *)
  let rec split (all,t) ((c0 : Cset.t),n0) = 
    let t = 
      [(Cset.difference c0 all, [n0])] @
      List.map (fun (c,ns) -> (Cset.intersection c c0, n0::ns)) t @
      List.map (fun (c,ns) -> (Cset.difference c c0, ns)) t in
    (Cset.union all c0,
    List.filter (fun (c,ns) -> not (Cset.is_empty c)) t) in

  let (_,t) = List.fold_left split (Cset.empty,[]) t in

  (* Epsilon closure of targets *)
  let t = List.map (fun (c,ns) -> (c,add_nodes [] ns)) t in

  (* Canonical ordering *)
  let t = Array.of_list t in
  Array.sort (fun (c1,ns1) (c2,ns2) -> compare c1 c2) t;
  Array.map fst t, Array.map snd t

let find_alloc tbl counter x =
  try Hashtbl.find tbl x
  with Not_found ->
    let i = !counter in
    incr counter;
    Hashtbl.add tbl x i;
    i
 
let part_tbl = Hashtbl.create 31
let part_id = ref 0
let get_part (t : Cset.t array) = find_alloc part_tbl part_id t


let compile rs =
  let rs = Array.map compile_re rs in
  let counter = ref 0 in
  let states = Hashtbl.create 31 in
  let states_def = ref [] in
  let rec aux state =
    try Hashtbl.find states state
    with Not_found ->
      let i = !counter in
      incr counter;
      Hashtbl.add states state i;
      let (part,targets) = transition state in
      let part = get_part part in
      let targets = Array.map aux targets in
      let finals = Array.map (fun (_,f) -> List.mem f state) rs in
      states_def := (i, (part,targets,finals)) :: !states_def;
      i
  in
  let init = ref [] in
  Array.iter (fun (i,_) -> init := add_node !init i) rs;
  ignore (aux !init);
  Array.init !counter (fun id -> List.assoc id !states_def)
  
let partitions () =
  let aux part =
    let seg = ref [] in
    Array.iteri
      (fun i c -> 
	 List.iter (fun (a,b) -> seg := (a,b,i) :: !seg) c)
      part;
     List.sort (fun (a1,_,_) (a2,_,_) -> compare a1 a2) !seg in
  let res = ref [] in
  Hashtbl.iter (fun part i -> res := (i, aux part) :: !res) part_tbl;
  Hashtbl.clear part_tbl;
  !res