File: tinyRope.ml

package info (click to toggle)
ocaml-rope 0.6.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 536 kB
  • sloc: ml: 1,715; sh: 33; makefile: 27
file content (365 lines) | stat: -rw-r--r-- 11,369 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
(* Rope: an implementation of the data structure described in

   Boehm, H., Atkinson, R., and Plass, M. 1995. Ropes: an alternative to
   strings.  Softw. Pract. Exper. 25, 12 (Dec. 1995), 1315-1330.

   Motivated by Luca de Alfaro's extensible array implementation Vec.

   Copyright (C) 2007   Mauricio Fernandez <mfp@acm.org>
                        http://eigenclass.org

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version,
   with the following special exception:

   You may link, statically or dynamically, a "work that uses the
   Library" with a publicly distributed version of the Library to
   produce an executable file containing portions of the Library, and
   distribute that executable file under terms of your choice, without
   any of the additional requirements listed in clause 6 of the GNU
   Lesser General Public License.  By "a publicly distributed version
   of the Library", we mean either the unmodified Library as
   distributed by the author, or a modified version of the Library that is
   distributed under the conditions defined in clause 2 of the GNU
   Lesser General Public License.  This exception does not however
   invalidate any other reasons why the executable file might be
   covered by the GNU Lesser General Public License.

   This library is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   The GNU Lesser General Public License is available at
   http://www.gnu.org/copyleft/lgpl.html; to obtain it, you can also
   write to the Free Software Foundation, Inc., 51 Franklin St,
   Fifth Floor, Boston, MA 02110-1301, USA.
 *)

(* =begin ignore *)
type t =
    Empty
  (* left, left size, right, right size, height *)
  | Concat of t * int * t * int * int
  | Leaf of string


let height = function
    Empty | Leaf _ -> 0
  | Concat(_,_,_,_,h) -> h

(* For debugging and judging balancing *)
let print =
  let rec print prefix = function
    | Empty -> print_string "\n"
    | Leaf s ->
        print_string prefix;
        print_string s;
        print_string "\n"
    | Concat(l,_, r,_, _) ->
        let prefixl = prefix ^ if height l = 0 then "/" else " " in
        let prefixr = prefix ^ if height r = 0 then "\\" else " " in
        print prefixl l;
        print prefixr r;
  in print ""


type forest_element = { mutable c : t; mutable len : int }

let str_append = (^)
let string_of_string_list l = String.concat "" l

module STRING = String

(* 48 limits max rope size to 220GB on 64 bit,
 * ~ 700MB on 32bit (length fields overflow after that) *)
let max_height = 48

(* actual size will be that plus 1 word header;
 * the code assumes it's an even num.
 * 256 gives up to a 50% overhead in the worst case (all leaf nodes near
 * half-filled *)
let leaf_size = 256
(* let leaf_size = 128 *)
(* let leaf_size = 64 *)
(* let leaf_size = 32 *)

(* =end *)

(* =begin code *)

exception Out_of_bounds

let empty = Empty

(* by construction, there cannot be Empty or Leaf "" leaves *)
let is_empty = function Empty -> true | _ -> false


let length = function
    Empty -> 0
  | Leaf s -> STRING.length s
  | Concat(_,cl,_,cr,_) -> cl + cr

let make_concat l r =
  let hl = height l and hr = height r in
  let cl = length l and cr = length r in
    Concat(l, cl, r, cr, if hl >= hr then hl + 1 else hr + 1)

let min_len =
  let fib_tbl = Array.make max_height 0 in
  let rec fib n = match fib_tbl.(n) with
      0 ->
        let last = fib (n - 1) and prev = fib (n - 2) in
        let r = last + prev in
        let r = if r > last then r else last in (* check overflow *)
          fib_tbl.(n) <- r; r
    | n -> n
  in
    fib_tbl.(0) <- leaf_size + 1; fib_tbl.(1) <- 3 * leaf_size / 2 + 1;
    Array.init max_height (fun i -> if i = 0 then 1 else fib (i - 1))

let max_length = min_len.(Array.length min_len - 1)

let concat_fast l r = match l with
    Empty -> r
  | Leaf _ | Concat(_,_,_,_,_) ->
      match r with
          Empty -> l
        | Leaf _ | Concat(_,_,_,_,_) -> make_concat l r

(* based on Hans-J. Boehm's *)
let add_forest forest rope len =
  let i = ref 0 in
  let sum = ref empty in
  while len > min_len.(!i+1) do
    if forest.(!i).c <> Empty then begin
      sum := concat_fast forest.(!i).c !sum;
      forest.(!i).c <- Empty
    end;
    incr i
  done;
  sum := concat_fast !sum rope;
  let sum_len = ref (length !sum) in
  while !sum_len >= min_len.(!i) do
    if forest.(!i).c <> Empty then begin
      sum := concat_fast forest.(!i).c !sum;
      sum_len := !sum_len + forest.(!i).len;
      forest.(!i).c <- Empty;
    end;
    incr i
  done;
  decr i;
  forest.(!i).c <- !sum;
  forest.(!i).len <- !sum_len

let concat_forest forest =
  Array.fold_left (fun s x -> concat_fast x.c s) Empty forest

let rec balance_insert rope len forest = match rope with
    Empty -> ()
  | Leaf _ -> add_forest forest rope len
  | Concat(l,cl,r,cr,h) when h >= max_height || len < min_len.(h) ->
      balance_insert l cl forest;
      balance_insert r cr forest
  | x -> add_forest forest x len (* function or balanced *)

let balance r =
  match r with
      Empty -> Empty
    | Leaf _ -> r
    | _ ->
        let forest = Array.init max_height (fun _ -> {c = Empty; len = 0}) in
          balance_insert r (length r) forest;
          concat_forest forest

let bal_if_needed l r =
  let r = make_concat l r in
    if height r < max_height then r else balance r

let concat_str l = function
    Empty | Concat(_,_,_,_,_) -> invalid_arg "concat_str"
  | Leaf rs as r ->
      let lenr = STRING.length rs in
        match l with
          | Empty -> r
          | Leaf ls ->
              let slen = lenr + STRING.length ls in
                if slen <= leaf_size then Leaf (str_append ls rs)
                else make_concat l r (* height = 1 *)
          | Concat(ll, cll, Leaf lrs, clr, h) ->
              let slen = clr + lenr in
                if clr + lenr <= leaf_size then
                  Concat(ll, cll, Leaf (str_append lrs rs), slen, h)
                else
                  bal_if_needed l r
          | _ -> bal_if_needed l r

let append_char c r = concat_str r (Leaf (STRING.make 1 c))

let concat l = function
    Empty -> l
  | Leaf _ as r -> concat_str l r
  | Concat(Leaf rls,rlc,rr,rc,h) as r ->
      (match l with
          Empty -> r
        | Concat(_,_,_,_,_) -> bal_if_needed l r
        | Leaf ls ->
            let slen = rlc + STRING.length ls in
              if slen <= leaf_size then
                Concat(Leaf(str_append ls rls), slen, rr, rc, h)
              else
                bal_if_needed l r)
  | r -> (match l with Empty -> r | _ -> bal_if_needed l r)

let prepend_char c r = concat (Leaf (STRING.make 1 c)) r

let rec get i = function
    Empty -> raise Out_of_bounds
  | Leaf s ->
      if i >= 0 && i < STRING.length s then STRING.unsafe_get s i
      else raise Out_of_bounds
  | Concat (l, cl, r, _, _) ->
      if i < cl then get i l
      else get (i - cl) r

let rec getn i = function
    Empty -> raise Out_of_bounds
  | Leaf _ ->
      0
  | Concat (l, cl, r, _, _) ->
      if i < cl then 1 + getn i l else 1 + getn (i - cl) r


let rec set i v = function
    Empty -> raise Out_of_bounds
  | Leaf s ->
      if i >= 0 && i < STRING.length s then
        let s = Bytes.of_string s in
        Bytes.unsafe_set s i v;
        Leaf (Bytes.unsafe_to_string s)
      else raise Out_of_bounds
  | Concat(l, cl, r, _, _) ->
      if i < cl then concat (set i v l) r
      else concat l (set (i - cl) v r)

let of_string = function
    s when STRING.length s = 0 -> Empty
  | s ->
      let min (x:int) (y:int) = if x <= y then x else y in
      let rec loop r s len i =
        if i < len then (* len - i > 0, thus Leaf "" can't happen *)
          loop (concat r (Leaf (STRING.sub s i (min (len - i) leaf_size))))
            s len (i + leaf_size)
        else
          r
      in loop Empty s (STRING.length s) 0

let rec make len c =
  let rec concatloop len i r =
    if i <= len then
      concatloop len (i * 2) (concat r r)
    else r
  in
    if len = 0 then Empty
    else if len <= leaf_size then Leaf (STRING.make len c)
    else
      let rope = concatloop len 2 (of_string (STRING.make 1 c)) in
        concat rope (make (len - length rope) c)

let rec sub start len = function
    Empty -> if start <> 0 || len <> 0 then raise Out_of_bounds else Empty
  | Leaf s ->
      if len > 0 then (* Leaf "" cannot happen *)
        (try Leaf (STRING.sub s start len) with _ -> raise Out_of_bounds)
      else if len < 0 || start < 0 || start > STRING.length s then
        raise Out_of_bounds
      else Empty
  | Concat(l,cl,r,cr,_) ->
      if start < 0 || len < 0 || start + len > cl + cr then raise Out_of_bounds;
      let left =
        if start = 0 then
          if len >= cl then
            l
          else sub 0 len l
        else if start > cl then Empty
        else if start + len >= cl then
          sub start (cl - start) l
        else sub start len l in
      let right =
        if start <= cl then
          let upto = start + len in
            if upto = cl + cr then r
            else if upto < cl then Empty
            else sub 0 (upto - cl) r
        else sub (start - cl) len r
      in
        concat left right

let insert start rope r =
  concat (concat (sub 0 start r) rope) (sub start (length r - start) r)

let remove start len r =
  concat (sub 0 start r) (sub (start + len) (length r - start - len) r)

let to_string r =
  let rec strings l = function
      Empty -> l
    | Leaf s -> s :: l
    | Concat(left,_,right,_,_) -> strings (strings l right) left
  in
    string_of_string_list (strings [] r)

let rec iter f = function
    Empty -> ()
  | Leaf s -> STRING.iter f s
  | Concat(l,_,r,_,_) -> iter f l; iter f r

let iteri f r =
  let rec aux f i = function
    Empty -> ()
  | Leaf s ->
      for j = 0 to STRING.length s - 1 do
        f (i + j) (STRING.unsafe_get s j)
      done
  | Concat(l,cl,r,_,_) -> aux f i l; aux f (i + cl) r
  in
    aux f 0 r

let rec rangeiter f start len = function
    Empty -> if start <> 0 || len <> 0 then raise Out_of_bounds
  | Leaf s ->
      let n = start + len in
      let lens = STRING.length s in
      if start >= 0 && len >= 0 && n <= lens then
        for i = start to n - 1 do
          f (STRING.unsafe_get s i)
        done
      else raise Out_of_bounds
  | Concat(l,cl,r,cr,_) ->
      if start < 0 || len < 0 || start + len > cl + cr then raise Out_of_bounds;
      if start < cl then begin
        let upto = start + len in
          if upto <= cl then
            rangeiter f start len l
          else begin
            rangeiter f start (cl - start) l;
            rangeiter f 0 (upto - cl) r
          end
      end else begin
        rangeiter f (start - cl) len r
      end

let rec fold f a = function
    Empty -> a
  | Leaf s ->
      let acc = ref a in
        for i = 0 to STRING.length s - 1 do
          acc := f !acc (STRING.unsafe_get s i)
        done;
        !acc
  | Concat(l,_,r,_,_) -> fold f (fold f a l) r

(* =end *)