1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
|
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(* Compaction of an automata *)
open Lexgen
(* Code for memory actions *)
let code = Table.create 0
(* instructions are 2 8-bits integers, a 0xff byte means return *)
let emit_int i = Table.emit code i
let ins_mem i c = match i with
| Copy (dst, src) -> dst::src::c
| Set dst -> dst::0xff::c
let ins_tag i c = match i with
| SetTag (dst, src) -> dst::src::c
| EraseTag dst -> dst::0xff::c
let do_emit_code c =
let r = Table.size code in
List.iter emit_int c ;
emit_int 0xff ;
r
let memory = Hashtbl.create 101
let mem_emit_code c =
try Hashtbl.find memory c with
| Not_found ->
let r = do_emit_code c in
Hashtbl.add memory c r ;
r
(* Code address 0 is the empty code (ie do nothing) *)
let _ = mem_emit_code []
let emit_tag_code c = mem_emit_code (List.fold_right ins_tag c [])
and emit_mem_code c =mem_emit_code (List.fold_right ins_mem c [])
(*******************************************)
(* Compact the transition and check arrays *)
(*******************************************)
(* Determine the integer occurring most frequently in an array *)
let most_frequent_elt v =
let frequencies = Hashtbl.create 17 in
let max_freq = ref 0 in
let most_freq = ref (v.(0)) in
for i = 0 to Array.length v - 1 do
let e = v.(i) in
let r =
try
Hashtbl.find frequencies e
with Not_found ->
let r = ref 1 in Hashtbl.add frequencies e r; r in
incr r;
if !r > !max_freq then begin max_freq := !r; most_freq := e end
done;
!most_freq
(* Transform an array into a list of (position, non-default element) *)
let non_default_elements def v =
let rec nondef i =
if i >= Array.length v then [] else begin
let e = v.(i) in
if e = def then nondef(i+1) else (i, e) :: nondef(i+1)
end in
nondef 0
type t_compact =
{mutable c_trans : int array ;
mutable c_check : int array ;
mutable c_last_used : int ; }
let create_compact () =
{ c_trans = Array.make 1024 0 ;
c_check = Array.make 1024 (-1) ;
c_last_used = 0 ; }
let reset_compact c =
c.c_trans <- Array.make 1024 0 ;
c.c_check <- Array.make 1024 (-1) ;
c.c_last_used <- 0
(* One compacted table for transitions, one other for memory actions *)
let trans = create_compact ()
and moves = create_compact ()
let grow_compact c =
let old_trans = c.c_trans
and old_check = c.c_check in
let n = Array.length old_trans in
c.c_trans <- Array.make (2*n) 0;
Array.blit old_trans 0 c.c_trans 0 c.c_last_used;
c.c_check <- Array.make (2*n) (-1);
Array.blit old_check 0 c.c_check 0 c.c_last_used
let do_pack state_num orig compact =
let default = most_frequent_elt orig in
let nondef = non_default_elements default orig in
let rec pack_from b =
while
b + 257 > Array.length compact.c_trans
do
grow_compact compact
done;
let rec try_pack = function
[] -> b
| (pos, _v) :: rem ->
if compact.c_check.(b + pos) = -1 then
try_pack rem
else pack_from (b+1) in
try_pack nondef in
let base = pack_from 0 in
List.iter
(fun (pos, v) ->
compact.c_trans.(base + pos) <- v;
compact.c_check.(base + pos) <- state_num)
nondef;
if base + 257 > compact.c_last_used then
compact.c_last_used <- base + 257;
(base, default)
let pack_moves state_num move_t =
let move_v = Array.make 257 0
and move_m = Array.make 257 0 in
for i = 0 to 256 do
let act,c = move_t.(i) in
move_v.(i) <- (match act with Backtrack -> -1 | Goto n -> n) ;
move_m.(i) <- emit_mem_code c
done ;
let pk_trans = do_pack state_num move_v trans
and pk_moves = do_pack state_num move_m moves in
pk_trans, pk_moves
(* Build the tables *)
type lex_tables =
{ tbl_base: int array; (* Perform / Shift *)
tbl_backtrk: int array; (* No_remember / Remember *)
tbl_default: int array; (* Default transition *)
tbl_trans: int array; (* Transitions (compacted) *)
tbl_check: int array; (* Check (compacted) *)
(* code addresses are managed in a similar fashion as transitions *)
tbl_base_code : int array; (* code ptr / base for Shift *)
tbl_backtrk_code : int array; (* nothing / code when Remember *)
(* moves to execute before transitions (compacted) *)
tbl_default_code : int array;
tbl_trans_code : int array;
tbl_check_code : int array;
(* byte code itself *)
tbl_code: int array;}
let compact_tables state_v =
let n = Array.length state_v in
let base = Array.make n 0
and backtrk = Array.make n (-1)
and default = Array.make n 0
and base_code = Array.make n 0
and backtrk_code = Array.make n 0
and default_code = Array.make n 0 in
for i = 0 to n - 1 do
match state_v.(i) with
| Perform (n,c) ->
base.(i) <- -(n+1) ;
base_code.(i) <- emit_tag_code c
| Shift(trans, move) ->
begin match trans with
| No_remember -> ()
| Remember (n,c) ->
backtrk.(i) <- n ;
backtrk_code.(i) <- emit_tag_code c
end;
let (b_trans, d_trans),(b_moves,d_moves) = pack_moves i move in
base.(i) <- b_trans; default.(i) <- d_trans ;
base_code.(i) <- b_moves; default_code.(i) <- d_moves ;
done;
let code = Table.trim code in
let tables =
if Array.length code > 1 then
{ tbl_base = base;
tbl_backtrk = backtrk;
tbl_default = default;
tbl_trans = Array.sub trans.c_trans 0 trans.c_last_used;
tbl_check = Array.sub trans.c_check 0 trans.c_last_used;
tbl_base_code = base_code ;
tbl_backtrk_code = backtrk_code;
tbl_default_code = default_code;
tbl_trans_code = Array.sub moves.c_trans 0 moves.c_last_used;
tbl_check_code = Array.sub moves.c_check 0 moves.c_last_used;
tbl_code = code}
else (* when no memory moves, do not emit related tables *)
{ tbl_base = base;
tbl_backtrk = backtrk;
tbl_default = default;
tbl_trans = Array.sub trans.c_trans 0 trans.c_last_used;
tbl_check = Array.sub trans.c_check 0 trans.c_last_used;
tbl_base_code = [||] ;
tbl_backtrk_code = [||];
tbl_default_code = [||];
tbl_trans_code = [||];
tbl_check_code = [||];
tbl_code = [||]}
in
reset_compact trans ;
reset_compact moves ;
tables
|