1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
|
type terminals =
| Empty
| Terminals of Entry.t array
| Summary of Entry.t array
type node =
{ start : int
; len : int
; size : int
; terminals : terminals
; children : node array option
}
type t =
{ str : string
; t : node
}
let empty = { start = 0; len = 0; size = 0; children = None; terminals = Empty }
let empty () =
(* avoid ancient segfaulting on statically allocated values *)
Obj.obj @@ Obj.dup @@ Obj.repr empty
let size t = t.t.size
let minimum { t; _ } =
match t.terminals with
| Empty -> assert false
| Terminals arr | Summary arr -> arr.(0)
let array_find ~str chr arr =
let rec go i =
if i >= Array.length arr
then None
else begin
let node = arr.(i) in
if chr = str.[node.start - 1] then Some node else go (i + 1)
end
in
go 0
let array_find ~str chr = function
| None -> None
| Some arr -> array_find ~str chr arr
let lcp i_str i j_str j j_len =
let j_stop = j + j_len in
let rec go_lcp i j =
if i >= String.length i_str || j >= j_stop
then i
else begin
let i_chr, j_chr = i_str.[i], j_str.[j] in
if i_chr <> j_chr then i else go_lcp (i + 1) (j + 1)
end
in
let i' = go_lcp i j in
i' - i
let rec find ~str node pattern i =
if i >= String.length pattern
then Some node
else begin
match array_find ~str pattern.[i] node.children with
| None -> None
| Some child -> find_lcp ~str child pattern (i + 1)
end
and find_lcp ~str child pattern i =
let n = lcp pattern i str child.start child.len in
if i + n = String.length pattern
then Some { child with start = child.start + n; len = child.len - n }
else if n = child.len
then find ~str child pattern (i + n)
else None
let find t pattern =
match find_lcp ~str:t.str t.t pattern 0 with
| None -> None
| Some child -> Some { str = t.str; t = child }
let advance node =
assert (node.len >= 1) ;
{ node with start = node.start + 1; len = node.len - 1 }
let stepback node =
assert (node.len >= 0) ;
{ node with start = node.start - 1; len = node.len + 1 }
let rec find_skip ~spaces t pattern yield =
let skip () =
let node = t.t in
if node.len >= 1
then begin
let spaces = spaces + if t.str.[node.start] = ' ' then 1 else 0 in
if spaces > 1
then ()
else find_skip ~spaces { t with t = advance t.t } pattern yield
end
else begin
match node.children with
| None -> ()
| Some children ->
Array.iter
(fun child -> find_skip ~spaces { t with t = stepback child } pattern yield)
children
end
in
if spaces = 0
then skip ()
else if spaces = 1 && pattern = Type_polarity.poly
then begin
match find t pattern with
| None -> ()
| Some here -> yield here
end
else begin
skip () ;
match find t pattern with
| None -> ()
| Some here -> yield here
end
let find_star t pattern yield =
let rec go t = function
| [] -> yield t
| p :: ps -> find_skip ~spaces:0 t p @@ fun t -> go t ps
in
match String.split_on_char ' ' pattern with
| [] -> ()
| p :: ps -> begin
match find t p with
| None -> ()
| Some t -> go t ps
end
let find_star t pattern =
let found = ref [] in
find_star t pattern (fun t -> found := t :: !found) ;
!found
|