File: pdfio.ml

package info (click to toggle)
camlpdf 0.5-1
  • links: PTS, VCS
  • area: non-free
  • in suites: squeeze, wheezy
  • size: 1,516 kB
  • ctags: 2,689
  • sloc: ml: 18,229; ansic: 139; makefile: 139
file content (380 lines) | stat: -rw-r--r-- 11,068 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
(* \chaptertitle{PDFIo}{General Input and Output} *)
open Utility

(* We use 64-bit sized files as standard. *)
(*IF-OCAML*)
open LargeFile
(*ENDIF-OCAML*)

(* \section{Defining and creating input and output functions} *)

(*IF-OCAML*)
type pos = int64
let pos_succ = i64succ
let pos_pred = i64pred
let pos_max = i64max
let possub = i64sub
let posadd = i64add
let posofi x = i64ofi x
let postoi x = i64toi x
let postoi64 x = x
let posofi64 x = x
(*ENDIF-OCAML*)

(*i*)(*F#
type pos = int
let pos_succ = succ
let pos_pred = pred
let pos_max = max
let possub = ( - )
let posadd = ( + )
let posofi (x : int) = (x : pos)
let postoi (x : int) = (x : pos)
let postoi64 (x : pos) = (i64ofi x : int64)
let posofi64 (x : int64) = (i64toi x : pos)
F#*)(*i*)

let no_more = ~-1

(* \intf A general type for input functions. This allows paramaterization over
channels, strings, bigarrays etc. *)
type input =
  {pos_in : unit -> pos;
   seek_in : pos -> unit;
   input_char : unit -> char option;
   input_byte : unit -> int;
   in_channel_length : unit -> pos;
   set_offset : pos -> unit}

(* \intf A general type for output functions, allowing parameterisation as above. *)
type output =
  {pos_out : unit -> pos;
   seek_out : pos -> unit;
   output_char : char -> unit;
   output_byte : int -> unit;
   out_channel_length : unit -> pos}

(* \intf Create input functions from a channel. *)
let input_of_channel ch =
  let offset = ref (posofi 0) in
    {pos_in =
       (fun () -> possub (pos_in ch) !offset);
     seek_in =
       (fun x -> seek_in ch (posadd x !offset));
     input_char =
       (fun () ->
          try Some (input_char ch) with End_of_file -> dpr "3A"; None);
     input_byte =
       (fun () ->
          try input_byte ch with End_of_file -> dpr "3B"; no_more);
     in_channel_length =
       (fun () -> in_channel_length ch);
     set_offset =
       (fun o -> offset := o)}

(* \intf Create input functions from a [Utility.stream]. *)
let input_of_stream s =
  let input_int () =
    if s.pos > stream_size s.data - 1
      then
        begin
          s.pos <- s.pos + 1;
          no_more
        end
      else
        begin
          s.pos <- s.pos + 1;
          sget s.data (s.pos - 1)
        end
  in
    {pos_in =
       (fun () -> posofi s.pos);
     seek_in =
       (fun p ->
          s.pos <- postoi p);
     input_char =
       (fun () ->
         match input_int () with x when x = no_more -> None | s -> Some (char_of_int s));
     input_byte =
       input_int;
     in_channel_length =
       (fun () -> posofi (stream_size s.data));
     set_offset =
       (fun _ -> raise (Failure "set_offset: not implemented"))
    }

(* \intf Create input functions from a [Utility.bytestream]. *)
let input_of_bytestream b =
  input_of_stream {pos = 0; data = b}

let input_of_string s =
  input_of_bytestream (bytestream_of_string s)

(* \intf Output functions over channels *)
let output_of_channel ch =
  {pos_out = (fun () -> pos_out ch);
   seek_out = seek_out ch;
   output_char = (fun c -> output_byte ch (int_of_char c));
   output_byte = output_byte ch;
   out_channel_length = (fun () -> out_channel_length ch)}

(* \intf Output functions over streams. If data is written past the end of a stream,
we extend the stream to that point plus one-third of that (new) size. Note that
this has an implication upon mixing reading and writing: the stream will have
junk in the extended section and will be longer than that which has been
written. *)
let output_of_stream s =
  let highest_written = ref (posofi 0) in
    let output_int i =
      if s.pos > stream_size s.data - 1
        then
          let newstream = mkstream (s.pos * 2 - s.pos / 2) in
            for x = 0 to stream_size s.data - 1 do
              sset newstream x (sget s.data x)
            done;
            sset newstream s.pos i;
            highest_written := pos_max !highest_written (posofi s.pos);
            s.pos <- s.pos + 1;
            s.data <- newstream
        else
          begin
            highest_written := pos_max !highest_written (posofi s.pos);
            sset s.data s.pos i;
            s.pos <- s.pos + 1
          end
    in
        {pos_out =
           (fun () -> posofi s.pos);
         seek_out =
           (fun p -> s.pos <- postoi p);
         output_char =
           (fun c -> output_int (int_of_char c));
         output_byte =
           output_int;
         out_channel_length =
           (fun () -> pos_succ !highest_written)}

(* \section{Utility functions} *)

(* \intf Nudge forward one character. *)
let nudge i =
  ignore (i.input_byte ())

(* \intf Read one character behind the current position, and reposition ourselves on
that character. *)
let read_char_back i =
  let pos = i.pos_in () in
    i.seek_in (pos_pred pos);
    let chr = i.input_char () in
      i.seek_in (pos_pred pos);
      chr

(* \intf Go back one character in a file. *)
let rewind i =
  i.seek_in (pos_pred (i.pos_in ()))

let rewind2 i =
  i.seek_in (possub (i.pos_in ()) (posofi 2))

let rewind3 i =
  i.seek_in (possub (i.pos_in ()) (posofi 3))

(* \intf Read a character, leaving the position unchanged. *)
let peek_char i =
  let r = i.input_char () in
    rewind i; r

(* \intf Read a byte, leaving the position unchanged. *)
let peek_byte i =
  let r = i.input_byte () in
    rewind i; r

(* \intf Output a string. *)
let output_string o s =
  String.iter o.output_char s

(* \intf Make a bytestream of an input channel. *)
let bytestream_of_input_channel ch =
  let fi = input_of_channel ch in
    let size = postoi (fi.in_channel_length ()) in
      let s = mkstream size in
        for x = 1 to size do
          match fi.input_byte () with
          | b when b = no_more -> failwith "channel length inconsistent"
          | b -> sset s (x - 1) b
        done;
        s

(* \intf Save a bytestream to a channel. *)
let bytestream_to_output_channel ch data =
  for x = 1 to stream_size data do
    output_byte ch (sget data (x - 1))
  done

(* Like [Pervasives.read_line] *) 
let read_line i =
  (* Raise EndOfInput if at end *)
  begin match i.input_byte () with
  | x when x = no_more -> dpr "O"; raise End_of_file;
  | _ -> ()
  end;
  rewind i;
  (* Read characters whilst <> newline or until end of input *)
  let rec read_chars prev =
    match i.input_byte () with
    | x when x = no_more -> rev prev
    | x when char_of_int x = '\n' -> rev ('\n'::prev)
    | x -> read_chars (char_of_int x::prev)
  in
    implode (read_chars [])

(* \section{Reading MSB-first Bit streams} *)

(*\intf The type of bit (MSB first) streams. *)
type bitstream =
  {input : input; (* The input from which bits are taken. It is advanced a byte at a time *)
   mutable currbyte : int; (* Current byte value from input *)
   mutable bit : int; (* Mask for getting the next bit (128, 64,... 2, 1 or 0 = none left) *)
   mutable bitsread : int (* A count of the number of bits read since inception. Debug use only *)}

(* \intf Make a [bitstream] from an [input]. *) 
let bitstream_of_input i =
  {currbyte = 0;
   bit = 0;
   bitsread = 0;
   input = i}

(* For debug only.... *)
let input_in_bitstream b =
  b.input

(* \intf Get a single bit. *)
let rec getbit b =
  if b.bit = 0 then
    begin
      b.currbyte <-
        begin match b.input.input_byte () with
        | x when x = no_more -> dpr "P"; raise End_of_file
        | x -> x
        end;
      b.bit <- 128;
      getbit b
    end
  else
    let r = b.currbyte land b.bit > 0 in
      b.bitsread <- b.bitsread + 1;
      b.bit <- b.bit / 2;
      r

(* \intf Get a bit as an integer, set = 1, unset = 0 *)
let getbitint i =
  if getbit i then 1 else 0

(* \intf Align on a byte boundary. *)
let align b =
  if b.bit > 0 then b.bitsread <- (b.bitsread / 8 + 1)  * 8;
  b.bit <- 0

(* Get [n] (up to 32) bits from [b], returned as an [int32], taken highest bit
first. Getting 0 bits gets the value 0.\SPEED{Far too slow}. *)
let char_of_bool = function true -> '1' | false -> '0'

let getval_32 b n =
  if n < 0 then raise (Invalid_argument "Io.getval_32 - n < 0") else
    if n = 0 then 0l else
      let bits = manyunique (mkunit getbit b) n in
        Int32.of_string ("0b" ^ implode (map char_of_bool bits))

(* \section{Writing MSB-first bit streams} *)

(* The type: A current byte, the position in the byte (0 = nothing in it, 7 =
almost full), and the list (in reverse order) of full bytes so far *)
type bitstream_write =
  {mutable wcurrbyte : int;
   mutable wbit : int;
   mutable bytes : int list}

let make_write_bitstream () =
  {wcurrbyte = 0;
   wbit = 0;
   bytes = []}

let copy_write_bitstream b =
  let b' = make_write_bitstream () in
    b'.wcurrbyte <- b.wcurrbyte;
    b'.wbit <- b.wbit;
    b'.bytes <- b.bytes;
    b'

let print_bitstream b =
  Printf.printf "wcurrbyte = %i, wbit = %i, %i bytes output\n"
  b.wcurrbyte b.wbit (length b.bytes)

(* Put a single bit into bitstream [b]*)
let putbit b bit =
  assert (bit = 0 || bit = 1);
  match b.wbit with
  | 7 ->
      b.bytes <- (b.wcurrbyte lor bit) :: b.bytes;
      b.wbit <- 0;
      b.wcurrbyte <- 0
  | _ ->
      b.wbit <- b.wbit + 1;
      b.wcurrbyte <- b.wcurrbyte lor (bit lsl (8 - b.wbit))

let putbool b bit =
  putbit b ((function false -> 0 | true -> 1) bit)

(* Put a multi-bit value [n] of bits [bs] (given as an [int32]) into bitstream [b]. *)
let rec putval b bs n =
  if bs < 0 || bs > 32 then raise (Invalid_argument "putval");
  match bs with
  | 0 -> ()
  | _ ->
      let bit =
        if land32 n (i32ofi (1 lsl (bs - 1))) > 0l then 1 else 0
      in
        putbit b bit;
        putval b (bs - 1) n

(* Align on a byte boundary, writing zeroes. *)
let align_write b =
  if b.wbit > 0 then
    for x = 1 to 8 - b.wbit do
      putbit b 0
    done

(* Get the output out. *)
let bytestream_of_write_bitstream b =
  align_write b;
  bytestream_of_list (rev b.bytes)

(* Return a list of booleans, representing (in order) the bits *)
let bits_of_write_bitstream b =
  let numbits = length b.bytes * 8 + b.wbit
  and bytestream = bytestream_of_write_bitstream b
  and bits = ref [] in
    let bitstream = bitstream_of_input (input_of_bytestream bytestream) in
      for x = 1 to numbits do
        bits =| getbit bitstream
      done;
      rev !bits

(* Same, but from a list *)
let join_write_bitstreams ss =
  let c = make_write_bitstream () in
    iter
      (putbool c)
      (flatten (map bits_of_write_bitstream ss));
    c

(* Append b to a. Inputs unaltered. *)
let write_bitstream_append a b =
  join_write_bitstreams [a; b]

(* Same, but align at the join. *)
let write_bitstream_append_aligned a b =
  let c = copy_write_bitstream a in
    align_write c;
    write_bitstream_append c b