1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
|
(** Representing PDF Files in Memory *)
(** {2 PDF Objects} *)
type toget
(** A stream is either in memory, or at a position and of a length in an
[Pdfio.input]. *)
type stream =
| Got of Pdfio.bytes
| ToGet of toget
(** PDF objects. An object is a tree-like structure containing various things.
A PDF file is basically a directed graph of objects. *)
type pdfobject =
| Null
| Boolean of bool
| Integer of int
| Real of float
| String of string
| Name of string
| Array of pdfobject list
| Dictionary of (string * pdfobject) list
| Stream of (pdfobject * stream) ref
| Indirect of int
(** {2 The Object map} *)
(** You should not expect to manipulate these types and functions directly. *)
(** This type represents a possibly-parsed, possibly-decrypted, possibly-read-from-an-object-stream object. *)
type objectdata =
(* Not from an object stream, fully parsed, not necessarily decrypted yet *)
| Parsed of pdfobject
(* Was from an object stream, decrypted already when object stream read *)
| ParsedAlreadyDecrypted of pdfobject
(* Not parsed yet. Needs to be read from an object, which may still be encrypted *)
| ToParse
(* (stream object number, index in stream) Not parsed yet. Will come from an object stream. *)
| ToParseFromObjectStream of (int, int list) Hashtbl.t * int * int * (int -> int list -> (int * (objectdata ref * int)) list)
type pdfobjmap_key = int
type pdfobjmap = (pdfobjmap_key, objectdata ref * int) Hashtbl.t
(** The object map maps object numbers [pdfobjmap_key] to a reference to the
object data and the generation number *)
(** Make an empty object map *)
val pdfobjmap_empty : unit -> pdfobjmap
(** Find an object in the object map *)
val pdfobjmap_find : pdfobjmap_key -> pdfobjmap -> objectdata ref * int
(** The objects. Again, you won't normally manipulate this directly.
[maxobjnum] is the biggest object number seen yet. [parse] is a function to
parse a non-object stream object given its object number, [pdfobjects] is the
object map itself. [object_stream_ids] is a hash table of (object number,
was-stored-in-obect-stream-number) pairs, which is used to reconstruct stream
objects when preserving them upon write. *)
type pdfobjects =
{mutable maxobjnum : int;
mutable parse : (pdfobjmap_key -> pdfobject) option;
mutable pdfobjects : pdfobjmap;
mutable object_stream_ids : (int, int) Hashtbl.t}
(** {2 The PDF document} *)
type saved_encryption =
{from_get_encryption_values :
Pdfcryptprimitives.encryption * string * string * int32 * string * string option * string option;
encrypt_metadata : bool;
perms : string}
type deferred_encryption =
{crypt_type : Pdfcryptprimitives.encryption;
file_encryption_key : string option;
obj : int;
gen : int;
key : int array;
keylength : int;
r : int}
(** A Pdf document. Major and minor version numbers, object number of root, the
objects objects and the trailer dictionary as a [Dictionary] [pdfobject]. *)
type t =
{mutable major : int;
mutable minor : int;
mutable root : int;
mutable objects : pdfobjects;
mutable trailerdict : pdfobject;
mutable was_linearized : bool;
mutable saved_encryption : saved_encryption option}
(** The empty document (PDF 1.0, no objects, no root, empty trailer dictionary).
Note this is not a well-formed PDF. *)
val empty : unit -> t
(** {2 Exceptions and errors} *)
(** This exception is raised when some malformity in a PDF is found -- quite a
wide range of circumstances, and may be raised from many functions. *)
exception PDFError of string
(** This function, given a [Pdfio.input] and an ancilliary string, builds an
error string which includes the source of the Pdfio.input (filename, string,
bytes etc) so we can trace what it was originally built from *)
val input_pdferror : Pdfio.input -> string -> string
(** {2 Useful utilities} *)
(** Get a stream from disc if it hasn't already been got. The input is a
[Stream pdfobject]. *)
val getstream : pdfobject -> unit
(** Return a float from a [Real], an [Int] or an [Indirect] *)
val getnum : t -> pdfobject -> float
(** Lookup an object in a document, parsing it if required. Raises [Not_found]
if the object does not exist. *)
val lookup_obj : t -> int -> pdfobject
(** [lookup_fail errtext doc key dict] looks up a key in a PDF dictionary or the
dictionary of a PDF stream. Fails with [PDFError errtext] if the key is not
found. Follows indirect object links. *)
val lookup_fail : string -> (t -> string -> pdfobject -> pdfobject)
(** Same, but with customised exception. *)
val lookup_exception : exn -> t -> string -> pdfobject -> pdfobject
(** [lookup_direct doc key dict] looks up the key, resolving indirections at
source and destination, returning an option type. *)
val lookup_direct : t -> string -> pdfobject -> pdfobject option
(** [lookup_immediate key dict] looks up the key returning the value, without
following indirects at either source or destination. *)
val lookup_immediate : string -> pdfobject -> pdfobject option
(** [lookup_chain doc start keys] looks up the key in a nested dictionary. For
example [lookup_chain pdf pdf.Pdf.trailerdict ["/Root"; "/StructTreeRoot";
"/RoleMap"]] *)
val lookup_chain : t -> pdfobject -> string list -> pdfobject option
(** [replace_chain doc chain obj] sets the object at the given chain from the
trailer dictionary to the given object. If the final part of the chain does
not exist, it is created as direct, nested, dictionaries. *)
val replace_chain : t -> string list -> pdfobject -> unit
(** [remove_chain doc chain obj] removes the object at the given chain. True is
returned if it was removed, false otherwise. Only for chains entirely
composed of dictionaries for now. *)
val remove_chain : t -> string list -> bool
(** Return the object number of an indirect dictionary object, if it is indirect. *)
val indirect_number : t -> string -> pdfobject -> int option
(** Same as [lookup_direct], but allow a second, alternative key. *)
val lookup_direct_orelse :
t -> string -> string -> pdfobject -> pdfobject option
(** Remove a dictionary entry, if it exists. *)
val remove_dict_entry : pdfobject -> string -> pdfobject
(** [replace_dict_entry dict key value] replaces a dictionary entry, raising [Not_found] if it's not there. *)
val replace_dict_entry : pdfobject -> string -> pdfobject -> pdfobject
(** [add_dict_entry dict key value] adds a dictionary entry, replacing if already there. *)
val add_dict_entry : pdfobject -> string -> pdfobject -> pdfobject
(** Make a PDF object direct -- that is, follow any indirect links. *)
val direct : t -> pdfobject -> pdfobject
(** Return the size of the object map. *)
val objcard : t -> int
(** Remove the given object *)
val removeobj : t -> int -> unit
(** Add an object. Returns the number chosen. *)
val addobj : t -> pdfobject -> int
(** Same as [addobj], but pick a number ourselves. *)
val addobj_given_num : t -> (int * pdfobject) -> unit
(** {2 Compound structures} *)
(** Parse a PDF rectangle structure into min x, min y, max x, max y. *)
val parse_rectangle : t -> pdfobject -> float * float * float * float
(** Calling [parse_matrix pdf name dict] parses a PDF matrix found under
key [name] in dictionary [dict] into a [Transform.transform_matrix]. If there is
no matrix, the identity matrix is returned. *)
val parse_matrix : t -> string -> pdfobject -> Pdftransform.transform_matrix
(** Build a matrix [pdfobject]. *)
val make_matrix : Pdftransform.transform_matrix -> pdfobject
(** Make a number of PDF documents contain no mutual object numbers. They can
then be merged etc. without clashes. *)
val renumber_pdfs : t list -> t list
(** Given a dictionary and a prefix (e.g gs), return a name, starting with the
prefix, which is not already in the dictionary (e.g /gs0). *)
val unique_key : string -> pdfobject -> string
(** {2 Iteration} *)
(** Iterate over the objects in a document. The iterating functions recieves both
object number and object from the object map. *)
val objiter : (int -> pdfobject -> unit) -> t -> unit
(** The same, but in object number order. *)
val objiter_inorder : (int -> pdfobject -> unit) -> t -> unit
(** Iterate over the objects in a document. The iterating functions recieves
object number, generation number and object from the object map. *)
val objiter_gen : (int -> int -> pdfobject -> unit) -> t -> unit
(** Map over all pdf objects in a document. Does not include trailer dictionary. *)
val objselfmap : (pdfobject -> pdfobject) -> t -> unit
(** Iterate over just the stream objects in a document. *)
val iter_stream : (pdfobject -> unit) -> t -> unit
(** Select objects matching a predicate, and return their object numbers. *)
val objselect : (pdfobject -> bool) -> t -> int list
(** {2 Garbage collection} *)
(** Garbage-collect a pdf document. *)
val remove_unreferenced : t -> unit
(** {2 Miscellaneous} *)
(** These functions were previsouly undocumented. They are documented here for
now, and in the future will be categorised more sensibly. *)
(** True if a character is PDF whitespace. *)
val is_whitespace : char -> bool
(** True if a character is not PDF whitespace. *)
val is_not_whitespace : char -> bool
(** True if a character is a PDF delimiter. *)
val is_delimiter : char -> bool
(** List, in order, the page reference numbers of a PDF's page tree. *)
val page_reference_numbers : t -> int list
(** List the object numbers in a PDF. *)
val objnumbers : t -> int list
(** Use the given function on each element of a PDF dictionary. *)
val recurse_dict :
?preserve_order:bool -> (pdfobject -> pdfobject) -> (string * pdfobject) list -> pdfobject
(** Similarly for an [Array]. The function is applied to each element. *)
val recurse_array :
(pdfobject -> pdfobject) -> pdfobject list -> pdfobject
(** Calculate the changes required to renumber a PDF's objects 1..n. *)
val changes : t -> (int, int) Hashtbl.t
(** Perform the given renumberings on a PDF. *)
val renumber : ?preserve_order : bool -> (int, int) Hashtbl.t -> t -> t
(** Renumber an object given a change table. *)
val renumber_object_parsed : preserve_order:bool -> t -> (int, int) Hashtbl.t -> pdfobject -> pdfobject
(** Fetch a stream, if necessary, and return its contents (with no processing). *)
val bigarray_of_stream : pdfobject -> Pdfio.bytes
(** Make a objects entry from a parser and a list of (number, object) pairs. *)
val objects_of_list :
(int -> pdfobject) option -> (int * (objectdata ref * int)) list -> pdfobjects
(** Calling [objects_referenced no_follow_entries no_follow_contains pdf
pdfobject] find the objects reachable from the given object. Dictionary
keys in [no_follow_entries] are not explored. Dictionaries containing
entries in [no_follow_contains] are not explored. *)
val objects_referenced : string list -> (string * pdfobject) list -> t -> pdfobject -> int list
(** Generate and ID for a PDF document given its prospective file name (and
using the current date and time). If the file name is blank, the ID is
still likely to be unique, being based on date and time only. If
environment variable CAMLPDF_REPRODUCIBLE_IDS=true is set, the ID will instead
be set to a standard value. *)
val generate_id : t -> string -> (unit -> float) -> pdfobject
(** Return the document catalog. *)
val catalog_of_pdf : t -> pdfobject
(** Find the indirect reference given by the value associated with a key in a
dictionary. *)
val find_indirect : string -> pdfobject -> int option
(** Calling [nametree_lookup pdf k dict] looks up the name in the document's
name tree *)
val nametree_lookup : t -> pdfobject -> pdfobject -> pdfobject option
(** Return an ordered list of the key-value pairs in a given name tree. *)
val contents_of_nametree : t -> pdfobject -> (pdfobject * pdfobject) list
(** Copy a PDF data structure so that nothing is shared with the original. *)
val deep_copy : t -> t
(** Change the /ID string in a PDF's trailer dicfionary *)
val change_id : t -> string -> unit
(**/**)
(* This is only for the use of Pdfread for when the /Length is incorrect. *)
type toget_crypt =
| NoChange
| ToDecrypt of deferred_encryption
val length_of_toget : toget -> int
val input_of_toget : toget -> Pdfio.input
val position_of_toget : toget -> int
val toget : ?crypt:toget_crypt -> Pdfio.input -> int -> int -> toget
(* For inter-module recursion within CamlPDF, hence undocumented. *)
val string_of_pdf : (pdfobject -> string) ref
val transform_rect : t -> Pdftransform.transform_matrix -> pdfobject -> pdfobject
val transform_quadpoints : t -> Pdftransform.transform_matrix -> pdfobject -> pdfobject
|