File: pdfmarks.ml

package info (click to toggle)
camlpdf 0.5-1
  • links: PTS, VCS
  • area: non-free
  • in suites: squeeze, wheezy
  • size: 1,516 kB
  • ctags: 2,689
  • sloc: ml: 18,229; ansic: 139; makefile: 139
file content (300 lines) | stat: -rw-r--r-- 10,474 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
(* PDF Bookmarks *)
open Utility

type target = int (* Just page number for now *)

type bookmark =
  {level : int;
   text : string;
   target : target;
   isopen : bool}

let pagenumber_of_target (t : target) = (t : int)

let target_of_pagenumber (t : int) = (t : target)

let remove_bookmarks pdf =
  match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
  | None -> raise (Pdf.PDFError "remove_boomarks: Bad PDF: no root")
  | Some catalog ->
      let catalog' = Pdf.remove_dict_entry catalog "/Outlines" in
        let newcatalognum = Pdf.addobj pdf catalog' in
          {pdf with
            Pdf.root = newcatalognum;
            Pdf.trailerdict =
              Pdf.add_dict_entry
                pdf.Pdf.trailerdict "/Root" (Pdf.Indirect newcatalognum)}

type ntree =
  Br of int * Pdf.pdfobject * ntree list

let fresh source pdf =
  incr source; Pdf.maxobjnum pdf + !source

(* Flatten a tree and produce a root object for it. Return a list of
(num, pdfobject) pairs with the root first. *)
let flatten_tree source pdf = function
  | [] ->
      let n = fresh source pdf in
        [(n, Pdf.Dictionary [])], n
  | tree ->
      let root_objnum = fresh source pdf in
      (* Add /Parent links to root *)
      let tree =
        let add_root_parent (Br (i, dict, children)) =
          Br
            (i,
             Pdf.add_dict_entry dict "/Parent" (Pdf.Indirect root_objnum),
             children)
        in
          map add_root_parent tree
      in
        let rec really_flatten = function
          Br (i, pdfobject, children) ->
            (i, pdfobject) :: flatten (map really_flatten children)
        in
          let all_but_top = flatten (map really_flatten tree)
          and top, topnum =
            (* Make top level from objects at first level of tree *)
            match extremes tree with
              Br (first, _, _), Br (last, _, _) ->
                 (root_objnum, Pdf.Dictionary
                   [("/First", Pdf.Indirect first); ("/Last", Pdf.Indirect last)]),
                 root_objnum
          in
            top::all_but_top, topnum

(* Add /Count entries to an ntree *)
let add_counts tree = tree

(* Add /Parent entries to an ntree *)
let rec add_parent parent (Br (i, obj, children)) =
  let obj' =
    match parent with
    | None -> obj
    | Some parent_num ->
        Pdf.add_dict_entry obj "/Parent" (Pdf.Indirect parent_num)
  in
    Br (i, obj', map (add_parent (Some i)) children)

(* Add /First and /Last entries to an ntree *)
let rec add_firstlast (Br (i, obj, children)) =
  match children with
  | [] -> (Br (i, obj, children))
  | c ->
      match extremes c with
        Br (i', _, _), Br (i'', _, _) ->
          let obj = Pdf.add_dict_entry obj "/First" (Pdf.Indirect i') in
            let obj = Pdf.add_dict_entry obj "/Last" (Pdf.Indirect i'') in
              (Br (i, obj, map add_firstlast children))
       
(* Add /Next and /Prev entries to an ntree *)
let rec add_next (Br (i, obj, children)) =
  match children with
  | [] -> Br (i, obj, children)
  | [_] -> Br (i, obj, map add_next children)
  | c::cs ->
      let numbers = map (fun (Br (i, _, _)) -> i) cs in
        let children' =
          (map2
             (fun (Br (i, obj, children)) nextnum ->
                Br (i,
                    Pdf.add_dict_entry obj "/Next" (Pdf.Indirect nextnum),
                    children))
             (all_but_last (c::cs))
             numbers)
          @ [last cs]
        in
          Br (i, obj, map add_next children')

let rec add_prev (Br (i, obj, children)) =
  match children with
  | [] -> Br (i, obj, children)
  | [_] -> Br (i, obj, map add_prev children)
  | c::cs ->
      let numbers = map (fun (Br (i, _, _)) -> i) (all_but_last (c::cs)) in
        let children' =
          c::
            map2
              (fun (Br (i, obj, children)) prevnum ->
                 Br (i,
                     Pdf.add_dict_entry obj "/Prev" (Pdf.Indirect prevnum),
                     children))
              cs
              numbers
        in
          Br (i, obj, map add_prev children')

(* Find a page indirect from the page tree of a document, given a page number. *)
let page_object_number pdf destpage =
  try
    Pdf.Indirect (select destpage (Pdf.page_reference_numbers pdf))
  with
    (* The page might not exist in the output *)
    Invalid_argument "select" -> dpr "3b"; Pdf.Null

(* Make a node from a given title, destination page number in a given PDF ond
open flag. *)
let node_of_line pdf title destpage isopen =
  if destpage > 0 then (* destpage = 0 means no destination. *)
    Pdf.Dictionary
      [("/Title", Pdf.String title);
       ("/Dest", Pdf.Array
         [page_object_number pdf destpage; Pdf.Name "/Fit"])]
  else
    Pdf.Dictionary [("/Title", Pdf.String title)]

(* Make an ntree list from a list of parsed bookmark lines. *)
let rec make_outline_ntree source pdf = function
  | [] -> []
  | h::t ->
      let lower, rest = cleavewhile (fun {level = n'} -> n' > h.level) t in
        let node = node_of_line pdf h.text h.target h.isopen in
          Br (fresh source pdf, node, make_outline_ntree source pdf lower)
            ::make_outline_ntree source pdf rest

(* Add bookmarks. *)
let add_bookmarks parsed pdf =
  if parsed = [] then remove_bookmarks pdf else
  begin
    let source = ref 0 in
    let tree = make_outline_ntree source pdf parsed in
      (* Build the (object number, bookmark tree object) pairs. *)
      let pairs, tree_root_num =
        let tree =
          map add_firstlast tree
        in
          let tree =
            match add_next (add_prev (Br (0, Pdf.Null, tree))) with
              Br (_, _, children) -> children
          in
            flatten_tree source pdf (add_counts (map (add_parent None) tree))
      in
        (* Add the objects to the pdf *)
        iter
          (function x -> ignore (Pdf.addobj_given_num pdf x))
          pairs;
          (* Replace the /Outlines entry in the document catalog. *)
          match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
          | None -> raise (Pdf.PDFError "Bad PDF: no root")
          | Some catalog ->
              let catalog' =
                Pdf.add_dict_entry catalog "/Outlines" (Pdf.Indirect tree_root_num)
              in
                let newcatalognum = Pdf.addobj pdf catalog' in
                  {pdf with
                    Pdf.root = newcatalognum;
                    Pdf.trailerdict =
                      Pdf.add_dict_entry
                        pdf.Pdf.trailerdict "/Root" (Pdf.Indirect newcatalognum)}
  end

let error s = raise (Pdf.PDFError s)

let rec destpage_of_dest pdf = function
  | (Pdf.Array (
        [Pdf.Indirect n; _]
      | [Pdf.Indirect n; _; _]
      | [Pdf.Indirect n; _; _; _]
      | [Pdf.Indirect n; _; _; _; _]
      | [Pdf.Indirect n; _; _; _; _; _])) -> Some n
  | Pdf.Dictionary d ->
      begin match Pdf.lookup_direct pdf "/D" (Pdf.Dictionary d) with
      | Some dest -> destpage_of_dest pdf dest
      | None -> None
      end
  | Pdf.String s ->
      (* PDF 1.2. String object *)
      let rootdict =
        Pdf.lookup_obj pdf pdf.Pdf.root
      in
        begin match Pdf.lookup_direct pdf "/Names" rootdict with
        | Some namedict ->
            begin match Pdf.lookup_direct pdf "/Dests" namedict with
            | Some destsdict ->
                begin match Pdf.nametree_lookup pdf (Pdf.String s) destsdict with
                | None -> None
                | Some dest -> destpage_of_dest pdf (Pdf.direct pdf dest)
                end
            | _ -> error "No /Dests dictionary"
            end
        | _ -> error "No name dictionary"
        end
  | Pdf.Name n ->
      (* PDF 1.1. Name object *)
      begin match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
      | Some catalog ->
          begin match Pdf.lookup_direct pdf "/Dests" catalog with
          | Some dests ->
              begin match
                Pdf.lookup_direct pdf n dests
              with
              | Some dest' -> destpage_of_dest pdf dest'
              | None -> None
              end
          | None -> None
          end
      | None -> error "No Document Catalog"
      end
  | _ -> None

let rec traverse_outlines_lb indent_lb pdf outlines output =
  match Pdf.lookup_direct pdf "/First" outlines with
  | None -> ()
  | Some first -> do_until_no_next_lb indent_lb pdf first output

and do_until_no_next_lb indent_lb pdf outline output =
  let page_indirects =
    combine (Pdf.page_reference_numbers pdf) (ilist 1 (length (Pdfdoc.pages_of_pagetree pdf)))
  in
    let pagenumber p =
      match lookup p page_indirects with
      | Some p -> p
      | None -> 0
    in
      begin match Pdf.lookup_direct pdf "/Title" outline with
      | Some (Pdf.String s) ->
          let page =
            match Pdf.lookup_direct pdf "/Dest" outline with
            | Some dest ->
                begin match destpage_of_dest pdf dest with
                | None -> 0
                | Some p -> pagenumber p
                end
            | None ->
                match Pdf.lookup_direct pdf "/A" outline with
                | None -> 0
                | Some action ->
                    match Pdf.lookup_direct pdf "/D" action with
                    | None -> 0
                    | Some dest ->
                        match destpage_of_dest pdf dest with
                        | None -> 0
                        | Some p -> pagenumber p
          and opn =
            match Pdf.lookup_direct pdf "/Count" outline with
            | Some (Pdf.Integer i) when i > 0 -> true
            | _ -> false
          in
            output {level = !indent_lb; text = s; target = page; isopen = opn}
        | _ -> ()
        end;
        incr indent_lb;
        traverse_outlines_lb indent_lb pdf outline output;
        if !indent_lb > 0 then decr indent_lb;
        begin match Pdf.lookup_direct pdf "/Next" outline with
        | None -> ()
        | Some outline -> do_until_no_next_lb indent_lb pdf outline output
        end

let read_bookmarks pdf =
  match Pdf.lookup_direct pdf "/Root" pdf.Pdf.trailerdict with
  | None -> error "Bad PDF: no root"
  | Some catalog ->
      match Pdf.lookup_direct pdf "/Outlines" catalog with
      | None -> []
      | Some outlines ->
          let out = ref [] in
            let output = (function b -> out := b::!out) in
              traverse_outlines_lb (ref 0) pdf outlines output;
              rev !out