1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
|
(* Make a PDF suitable for draft printing by replacing its images by crossed
boxes. Usage: pdfdraft input.pdf output.pdf *)
open Pdfutil
(* Predicate on an xobject: true if an image xobject. *)
let isimage pdf (_, xobj) =
Pdf.lookup_direct pdf "/Subtype" xobj = Some (Pdf.Name "/Image")
(* Given a set of resources for a page, and the name of a resource, determine if
that name refers to an image xobject. *)
let xobject_isimage pdf resources name =
match resources with
| Pdf.Dictionary _ ->
begin match Pdf.lookup_direct pdf "/XObject" resources with
| Some xobjects ->
isimage pdf ("", Pdf.lookup_fail "xobject not there" pdf name xobjects)
| _ -> false
end
| _ -> failwith "bad resources"
(* Remove any image xobjects from a set of resources. *)
let remove_image_xobjects pdf resources =
match resources with
| Pdf.Dictionary res ->
begin match Pdf.lookup_direct pdf "/XObject" resources with
| Some (Pdf.Dictionary xobjects) ->
Pdf.Dictionary
(replace "/XObject" (Pdf.Dictionary (lose (isimage pdf) xobjects)) res)
| _ -> resources
end
| _ -> failwith "bad resources"
(* The subsitute for an image. *)
let substitute =
rev
[Pdfops.Op_q;
Pdfops.Op_w 0.;
Pdfops.Op_G 0.;
Pdfops.Op_re (0., 0., 1., 1.);
Pdfops.Op_m (0., 0.);
Pdfops.Op_l (1., 1.);
Pdfops.Op_m (0., 1.);
Pdfops.Op_l (1., 0.);
Pdfops.Op_S;
Pdfops.Op_Q]
(* Remove references to images from a graphics stream. *)
let rec remove_images_stream pdf resources prev = function
| [] -> rev prev
| (Pdfops.Op_Do name) as h::t ->
if xobject_isimage pdf resources name
then remove_images_stream pdf resources (substitute @ prev) t
else remove_images_stream pdf resources (h::prev) t
| Pdfops.InlineImage _::t ->
remove_images_stream pdf resources (substitute @ prev) t
| h::t ->
remove_images_stream pdf resources (h::prev) t
(* Remove images from a page. *)
let remove_images_page pdf page =
let content' =
remove_images_stream pdf page.Pdfpage.resources []
(Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content)
in
{page with
Pdfpage.content =
(let stream = Pdfops.stream_of_ops content' in
Pdfcodec.encode_pdfstream pdf Pdfcodec.Flate stream;
[stream]);
Pdfpage.resources =
remove_image_xobjects pdf page.Pdfpage.resources}
(* Remove images from all pages in a document. *)
let remove_images pdf =
let pages = Pdfpage.pages_of_pagetree pdf in
let pages' = map (remove_images_page pdf) pages in
let pdf, pagetree_num = Pdfpage.add_pagetree pages' pdf in
let pdf = Pdfpage.add_root pagetree_num [] pdf in
Pdf.remove_unreferenced pdf;
pdf
(* Read command line arguments and call [remove_images] *)
let _ =
match Array.to_list Sys.argv with
| [_; in_file; out_file] ->
begin try
let ch = open_in_bin in_file in
let pdf = Pdfread.pdf_of_channel None None ch in
Pdfwrite.pdf_to_file (remove_images pdf) out_file;
close_in ch
with
err ->
Printf.printf "Failed to produce output.\n%s\n\n" (Printexc.to_string err);
exit 1
end
| _ ->
print_string "Syntax: pdfdraft <input> <output>\n\n"; exit 1
|