File: pdfdraft.ml

package info (click to toggle)
camlpdf 2.9-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,272 kB
  • sloc: ml: 20,816; ansic: 9,525; makefile: 100; sh: 23
file content (97 lines) | stat: -rw-r--r-- 3,364 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
(* Make a PDF suitable for draft printing by replacing its images by crossed
boxes. Usage: pdfdraft input.pdf output.pdf *)
open Pdfutil

(* Predicate on an xobject: true if an image xobject. *)
let isimage pdf (_, xobj) =
  Pdf.lookup_direct pdf "/Subtype" xobj = Some (Pdf.Name "/Image")

(* Given a set of resources for a page, and the name of a resource, determine if
that name refers to an image xobject. *)
let xobject_isimage pdf resources name =
  match resources with
  | Pdf.Dictionary _ ->
      begin match Pdf.lookup_direct pdf "/XObject" resources with
      | Some xobjects ->
          isimage pdf ("", Pdf.lookup_fail "xobject not there" pdf name xobjects)
      | _ -> false
      end
  | _ -> failwith "bad resources"

(* Remove any image xobjects from a set of resources. *)
let remove_image_xobjects pdf resources =
  match resources with
  | Pdf.Dictionary res ->
      begin match Pdf.lookup_direct pdf "/XObject" resources with
      | Some (Pdf.Dictionary xobjects) ->
          Pdf.Dictionary
            (replace "/XObject" (Pdf.Dictionary (lose (isimage pdf) xobjects)) res)
      | _ -> resources
      end
  | _ -> failwith "bad resources"

(* The subsitute for an image. *)
let substitute =
  rev
    [Pdfops.Op_q;
     Pdfops.Op_w 0.;
     Pdfops.Op_G 0.;
     Pdfops.Op_re (0., 0., 1., 1.);
     Pdfops.Op_m (0., 0.);
     Pdfops.Op_l (1., 1.);
     Pdfops.Op_m (0., 1.);
     Pdfops.Op_l (1., 0.);
     Pdfops.Op_S;
     Pdfops.Op_Q]

(* Remove references to images from a graphics stream. *)
let rec remove_images_stream pdf resources prev = function
  | [] -> rev prev
  | (Pdfops.Op_Do name) as h::t ->
      if xobject_isimage pdf resources name
        then remove_images_stream pdf resources (substitute @ prev) t
        else remove_images_stream pdf resources (h::prev) t
  | Pdfops.InlineImage _::t ->
      remove_images_stream pdf resources (substitute @ prev) t
  | h::t ->
      remove_images_stream pdf resources (h::prev) t

(* Remove images from a page. *)
let remove_images_page pdf page =
  let content' =
    remove_images_stream pdf page.Pdfpage.resources []
       (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content)
  in
    {page with
      Pdfpage.content =
        (let stream = Pdfops.stream_of_ops content' in
          Pdfcodec.encode_pdfstream pdf Pdfcodec.Flate stream;
          [stream]);
      Pdfpage.resources =
        remove_image_xobjects pdf page.Pdfpage.resources}

(* Remove images from all pages in a document. *)
let remove_images pdf =
  let pages = Pdfpage.pages_of_pagetree pdf in
    let pages' = map (remove_images_page pdf) pages in
      let pdf, pagetree_num = Pdfpage.add_pagetree pages' pdf in
        let pdf = Pdfpage.add_root pagetree_num [] pdf in
          Pdf.remove_unreferenced pdf;
          pdf

(* Read command line arguments and call [remove_images] *)
let _ =
  match Array.to_list Sys.argv with
  | [_; in_file; out_file] ->
      begin try
        let ch = open_in_bin in_file in
          let pdf = Pdfread.pdf_of_channel None None ch in
            Pdfwrite.pdf_to_file (remove_images pdf) out_file;
            close_in ch
      with
        err ->
          Printf.printf "Failed to produce output.\n%s\n\n" (Printexc.to_string err);
          exit 1
      end
  | _ ->
      print_string "Syntax: pdfdraft <input> <output>\n\n"; exit 1