File: pdfspace.ml

package info (click to toggle)
camlpdf 0.5-1
  • links: PTS, VCS
  • area: non-free
  • in suites: squeeze, wheezy
  • size: 1,516 kB
  • ctags: 2,689
  • sloc: ml: 18,229; ansic: 139; makefile: 139
file content (263 lines) | stat: -rw-r--r-- 10,332 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
(* PDF Colour space parsing *)
open Utility

type point = float * float * float

type iccbased =
 {icc_n : int;
  icc_alternate : colourspace;
  icc_range : float array;
  icc_metadata : Pdf.pdfobject option;
  icc_stream : Pdf.pdfobject}

and colourspace =
  | DeviceGray
  | DeviceRGB
  | DeviceCMYK
  | CalGray of point * point * float (* White, Black, Gamma *)
  | CalRGB of point * point * float array * float array (* White, Black, Gamma, Matrix *)
  | Lab of point * point * float array (* White, Black, Range *)
  | ICCBased of iccbased
  | Indexed of colourspace * (int, int list) Hashtbl.t (* Base colourspace, values *)
  | Pattern
  | Separation of string * colourspace * Pdffun.pdf_fun
  | DeviceN of string array * colourspace * Pdffun.pdf_fun * Pdf.pdfobject

let rec string_of_colourspace = function
  | DeviceGray -> "/DeviceGray"
  | DeviceRGB -> "/DeviceRGB"
  | DeviceCMYK -> "/DeviceCMYK"
  | CalGray (_, _, _) -> "/CalGray"
  | CalRGB (_, _, _, _) -> "/CalRGB"
  | Lab (_, _, _) -> "/Lab"
  | ICCBased {icc_alternate = a} ->
      "ICC Based - alternate is " ^ string_of_colourspace a
  | Indexed (a, _) ->
      "Indexed - base is " ^ string_of_colourspace a
  | Pattern -> "/Pattern"
  | Separation (_, a, _) ->
      "Separation - base is " ^ string_of_colourspace a
  | DeviceN (_, a, _, _) ->
      "DeviceN - base is " ^ string_of_colourspace a

let name_of_colourspace = function
  | Separation (x, _, _) -> Some x
  | _ -> None

(* Read a tristimulus point. *)
let read_point pdf d n =
  match Pdf.lookup_direct pdf n d with
  | Some (Pdf.Array [a; b; c]) ->
      Pdf.getnum a, Pdf.getnum b, Pdf.getnum c
  | _ ->
      0., 0., 0.

let rec get_basic_table_colourspace c =
  match c with
  | Indexed (alt, _)
  (* FIXME Not actually checked the following two are correct *)
  | DeviceN (_, alt, _, _)
  | Separation (_, alt, _)
  | ICCBased {icc_alternate = alt} -> get_basic_table_colourspace alt
  | x -> x

(* Read a colour space. Raises [Not_found] on error. *)
let rec read_colourspace_inner pdf resources = function
  | Pdf.Indirect i ->
      read_colourspace_inner pdf resources (Pdf.direct pdf (Pdf.Indirect i))
  | Pdf.Name ("/DeviceGray" | "/G") -> DeviceGray
  | Pdf.Name ("/DeviceRGB" | "/RGB") -> DeviceRGB
  | Pdf.Name ("/DeviceCMYK" | "/CMYK") -> DeviceCMYK
  | Pdf.Name "/Pattern" -> Pattern
  | Pdf.Array [Pdf.Name "/Pattern"; base_colspace] -> Pattern (* FIXME *)
  | Pdf.Array [onething] -> read_colourspace_inner pdf resources onething (* [illus_effects.pdf] [[/Pattern]] *)
  | Pdf.Name space ->
      (*i flprint "Looking up space...\n"; i*)
      begin match Pdf.lookup_direct pdf "/ColorSpace" resources with
      | Some csdict ->
          begin match Pdf.lookup_direct pdf space csdict with
          | Some space' ->
              (*i flprint "Found the space... recursing\n"; i*)
              read_colourspace_inner pdf resources space'
          | None -> dpr "X"; raise Not_found
          end
      | None -> dpr "Y"; raise Not_found
      end
  | Pdf.Array [Pdf.Name "/CalGray"; dict] ->
      let whitepoint = read_point pdf dict "/WhitePoint"
      and blackpoint = read_point pdf dict "/BlackPoint"
      and gamma =
        match Pdf.lookup_direct pdf "/Gamma" dict with
        | Some n -> Pdf.getnum n
        | None -> 1.
      in
        CalGray (whitepoint, blackpoint, gamma)
  | Pdf.Array [Pdf.Name "/CalRGB"; dict] ->
      let whitepoint = read_point pdf dict "/WhitePoint"
      and blackpoint = read_point pdf dict "/BlackPoint"
      and gamma =
        match Pdf.lookup_direct pdf "/Gamma" dict with
        | Some (Pdf.Array [a; b; c]) ->
            [|Pdf.getnum a; Pdf.getnum b; Pdf.getnum c|]
        | _ ->
            [|1.; 1.; 1.|]
      and matrix =
        match Pdf.lookup_direct pdf "/Matrix" dict with
        | Some (Pdf.Array [a; b; c; d; e; f; g; h; i]) ->
            [|Pdf.getnum a; Pdf.getnum b; Pdf.getnum c;
              Pdf.getnum d; Pdf.getnum e; Pdf.getnum f;
              Pdf.getnum g; Pdf.getnum h; Pdf.getnum i|]
        | _ ->
            [|1.; 0.; 0.; 0.; 1.; 0.; 0.; 0.; 1.|]
      in
        CalRGB (whitepoint, blackpoint, gamma, matrix)
  | Pdf.Array [Pdf.Name "/Lab"; dict] ->
      let whitepoint = read_point pdf dict "/WhitePoint"
      and blackpoint = read_point pdf dict "/BlackPoint"
      and range =
        match Pdf.lookup_direct pdf "/Range" dict with
        | Some (Pdf.Array [a; b; c; d]) ->
            [|Pdf.getnum a; Pdf.getnum b; Pdf.getnum c; Pdf.getnum d|]
        | _ ->
            [|~-.100.; 100.; ~-.100.; 100.|]
      in
        Lab (whitepoint, blackpoint, range)
  | Pdf.Array [Pdf.Name "/ICCBased"; stream] ->
      begin match Pdf.direct pdf stream with
      | Pdf.Stream {contents = (dict, _)} ->
          let n =
            match Pdf.lookup_direct pdf "/N" dict with
            | Some (Pdf.Integer n) ->
                if n = 1 || n = 3 || n = 4 then n else raise Not_found
            | _ -> raise Not_found
          in
            let alternate =
              match Pdf.lookup_direct pdf "/Alternate" dict with
              | Some cs -> read_colourspace_inner pdf resources cs
              | _ ->
                 match n with
                 | 1 -> DeviceGray
                 | 3 -> DeviceRGB
                 | 4 -> DeviceCMYK
                 | _ -> raise (Assert_failure ("", 0, 0))
            and range =
              match Pdf.lookup_direct pdf "/Range" dict with
              | Some (Pdf.Array elts) when length elts = 2 * n ->
                 Array.of_list (map Pdf.getnum elts)
              | _ ->
                 Array.of_list (flatten (many [0.; 1.] n))
            and metadata =
              Pdf.lookup_direct pdf "/Metadata" dict
            in
              ICCBased
                {icc_n = n;
                 icc_alternate = alternate;
                 icc_range = range;
                 icc_metadata = metadata;
                 icc_stream = stream}
      | _ -> raise Not_found
      end
  | Pdf.Array [Pdf.Name ("/Indexed" | "/I"); bse; hival; lookup_data] ->
      let hival =
        match hival with
        | Pdf.Integer h -> h
        | _ -> raise (Pdf.PDFError "Bad /Hival")
      and bse =
        read_colourspace_inner pdf resources bse
      in
        let mktable_rgb data =
          try
            let table = Hashtbl.create (hival + 1)
            and i = Pdfio.input_of_bytestream data in
              for x = 0 to hival do
                let r = i.Pdfio.input_byte () in
                let g = i.Pdfio.input_byte () in
                let b = i.Pdfio.input_byte () in
                  Hashtbl.add table x [r; g; b]
              done;
              table
          with _ -> raise (Pdf.PDFError "Pdfspace: bad table")
        and mktable_cmyk data =
          try
            let table = Hashtbl.create (hival + 1)
            and i = Pdfio.input_of_bytestream data in
              for x = 0 to hival do
                let c = i.Pdfio.input_byte () in
                let m = i.Pdfio.input_byte () in
                let y = i.Pdfio.input_byte () in
                let k = i.Pdfio.input_byte () in
                  Hashtbl.add table x [c; m; y; k]
              done;
              table
          with _ -> raise (Pdf.PDFError "Pdfspace: bad table")
        in
          let table =
            begin match Pdf.direct pdf lookup_data with
            | (Pdf.Stream _) as stream ->
                Pdfcodec.decode_pdfstream pdf stream;
                begin match stream with
                | (Pdf.Stream {contents = (_, Pdf.Got data)}) ->
                    begin match get_basic_table_colourspace bse with
                    | DeviceCMYK -> mktable_cmyk data
                    | DeviceRGB | CalRGB _ | Lab _ -> mktable_rgb data
                    (* FIXME: We need to read all colourspaces here, except
                    Pattern and Index, which aren't allowed *)
                    | _ -> raise (Pdf.PDFError "Unsupported base colourspace in index colourspace")
                    end
                | _ -> raise (Pdf.PDFError "Indexed/Inconsistent")
                end
            | Pdf.String s ->
                let data = mkstream (String.length s) in
                  for x = 0 to stream_size data - 1 do
                    sset data x (int_of_char s.[x])
                  done;
                  begin match get_basic_table_colourspace bse with
                  | DeviceRGB | CalRGB _ | Lab _ -> mktable_rgb data
                    (* FIXME: We need to read all colourspaces here, except
                    Pattern and Index, which aren't allowed *)
                  | DeviceCMYK -> mktable_cmyk data
                  | _ -> raise (Pdf.PDFError "PDFSpace: Unknown base colourspace in index colourspace")
                  end
            | _ -> raise (Pdf.PDFError ("PDFSpace: unknown indexed colourspace"))
            end
          in
            Indexed (bse, table)
  | Pdf.Array [Pdf.Name "/Separation"; Pdf.Name name; alternate; tint] ->
      let alt_space =
        read_colourspace_inner pdf resources alternate
      and tint_transform =
        Pdffun.parse_function pdf tint
      in
        Separation (name, alt_space, tint_transform)
  | Pdf.Array [Pdf.Name "/DeviceN"; Pdf.Array names; alternate; tint] ->
      let names =
        Array.of_list (map (function Pdf.Name s -> s | _ -> raise Not_found) names)
      and alternate =
        read_colourspace_inner pdf resources alternate
      and tint =
        Pdffun.parse_function pdf tint
      in
        DeviceN (names, alternate, tint, Pdf.Dictionary [])
  | Pdf.Array [Pdf.Name "/DeviceN"; Pdf.Array names; alternate; tint; attributes] ->
      let names =
        Array.of_list (map (function Pdf.Name s -> s | _ -> raise Not_found) names)
      and alternate =
        read_colourspace_inner pdf resources alternate
      and tint =
        Pdffun.parse_function pdf tint
      in
        DeviceN (names, alternate, tint, attributes)
  | _ -> raise Not_found
       
let read_colourspace pdf resources space =
  try
    read_colourspace_inner pdf resources space
  with
    e ->
      (*i Printf.printf "SPACE:\n";
      Printf.printf "%s\n" (Pdfwrite.string_of_pdf space);
      Printf.printf "RESOURCES:\n";
      Printf.printf "%s\n" (Pdfwrite.string_of_pdf resources);
      flprint "\n"; i*)
      raise e