1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
|
% Copyright (C) 1994 Aladdin Enterprises. All rights reserved.
% pdf_base.ps
% Basic parser for PDF reader.
% This handles basic parsing of the file (including the trailer
% and cross-reference table), as well as objects, object references,
% and streams; it doesn't include any facilities for making marks on
% the page.
/.setlanguagelevel where { pop 2 .setlanguagelevel } if
.currentglobal true .setglobal
/pdfdict where { pop } { /pdfdict 100 dict def } ifelse
pdfdict begin
% We rebind # and #dsc later if we're writing out PostScript.
/# % <arg1> ... <argN> <opname> <N> # -
{ pop cvx exec
} bind def
/#dsc % mark <obj1> ... #dsc -
{ cleartomark
} bind def
% Define the name interpretation dictionary for reading values.
/valueopdict mark
(<<) cvn { mark } bind % don't push an actual mark!
(>>) cvn /.dicttomark load
/[ { mark } bind % ditto
/] /] load
/true true
/false false
/null null
/F dup cvx % see Objects section below
/R dup cvx % see Objects section below
/stream dup cvx % see Streams section below
.dicttomark readonly def
% ------ Utilities ------ %
% Define a scratch string. The PDF language definition says that
% no line in a PDF file can exceed 255 characters.
/pdfstring 255 string def
% Read the previous line of a file. If we aren't at a line boundary,
% read the line containing the current position.
/prevline % - prevline <startpos> <substring>
{ PDFfile fileposition pdfstring
1 index 257 sub 0 .max PDFfile exch setfileposition
{ PDFfile fileposition
PDFfile 2 index readline pop
% Stack: initpos string startpos substring
PDFfile fileposition 4 index ge { exit } if
pop pop
}
loop 4 2 roll pop pop
} bind def
% Execute a file, interpreting its executable names in a given
% dictionary. The name procedures may do whatever they want
% to the operand stack.
/pdfrun % <file> <opdict> pdfrun -
{ % Construct a procedure with the file and opdict bound into it.
1 index cvlit mark mark 4 2 roll
{ token not { (%%EOF) cvn cvx } if
dup xcheck
{ DEBUG { dup == flush } if
2 copy .knownget
{ exch pop exch pop exec }
{ .stderr dup (****************Unknown operator: ) writestring
exch .writecvs .stderr dup (\n) writestring flushfile
pop
}
ifelse
}
{ exch pop DEBUG { dup ==only ( ) print flush } if
}
ifelse
}
aload pop .packtomark cvx
/loop cvx 2 packedarray cvx
{ stopped /PDFsource } aload pop
PDFsource
{ store { stop } if } aload pop .packtomark cvx
/PDFsource 3 -1 roll store exec
} bind def
% ------ File reading ------ %
% Read the cross-reference entry for an (unresolved) object.
% The caller must save and restore the PDFfile position if desired.
/readxrefentry % <object#> readxrefentry <objpos>
{ dup Objects exch get
PDFfile exch setfileposition
PDFfile token pop % object position
PDFfile token pop % generation #
PDFfile token pop % n or f
/n ne { /readxrefentry cvx /syntaxerror signalerror } if
dup 255 gt
{ Generations type /stringtype eq
{ % Convert Generations from a string to an array.
Generations length array dup
0 1 2 index length 1 sub
{ Generations 1 index get put dup
}
for pop /Generations exch store
}
if
}
if
% Stack: obj# objpos gen#
Generations 4 -1 roll 3 -1 roll put
} bind def
% ================================ Objects ================================ %
% We represent an unresolved object reference by a procedure of the form
% {obj# gen# resolveR}. This is not a possible PDF object, because PDF has
% no way to represent procedures. Since PDF in fact has no way to represent
% any PostScript object that doesn't evaluate to itself, we can 'force'
% a possibly indirect object painlessly with 'exec'.
% Note that since we represent streams by executable dictionaries
% (see below), we need both an xcheck and a type check to determine
% whether an object has been resolved.
/unresolved? % <object#> unresolved? <bool>
{ Objects exch get dup xcheck exch type /integertype eq and
} bind def
/oforce /exec load def
/oget % <array> <index> oget <object>
% <dict> <key> oget <object>
{ 2 copy get dup xcheck
{ exec dup 4 1 roll put }
{ exch pop exch pop }
ifelse
} bind def
% A null value in a dictionary is equivalent to an omitted key;
% we must check for this specially.
/knownoget
{ 2 copy known
{ oget dup null eq { pop false } { true } ifelse }
{ pop pop false }
ifelse
} bind def
% PDF 1.1 defines a 'foreign file reference', but not its meaning.
% Per the specification, we convert these to nulls.
/F % <file#> <object#> <generation#> F <object>
{ % Some PDF 1.1 files use F as a synonym for f!
count 3 lt { setfillcolor /f fs } { pop pop pop null } ifelse
} bind def
% We keep track of objects in a pair of arrays, Objects and Generations.
% Generations[N] is the current generation number for object number N.
% (As far as we can tell, this is needed only for error checking.)
% If object N is loaded, Objects[N] is the actual object;
% otherwise, Objects[N] is an executable integer giving the file offset
% of the object's entry in the cross-reference table.
/checkgeneration % <object#> <generation#> checkgeneration <object#>
{ Generations 2 index get 1 index ne
{ (Wrong generation: ) print 1 index =only ( ) print dup =
/R cvx /rangecheck signalerror
}
if pop
} bind def
/R % <object#> <generation#> R <object>
{ 1 index unresolved?
{ /resolveR cvx 3 packedarray cvx }
{ Objects 2 index get 3 1 roll checkgeneration pop }
ifelse
} bind def
% If we encounter an object definition while reading sequentially,
% we just store it away and keep going.
/objopdict mark
valueopdict { } forall
/endobj dup cvx
.dicttomark readonly def
/obj % <object#> <generation#> obj <object>
{ PDFfile objopdict pdfrun
} bind def
/endobj % <object#> <generation#> <object> endobj <object>
{ 3 1 roll
% Read the xref entry if we haven't yet done so.
% This is only needed for generation # checking.
1 index unresolved?
{ PDFfile fileposition
2 index readxrefentry pop
PDFfile exch setfileposition
} if
checkgeneration
Objects exch 2 index put
} bind def
% When resolving an object reference, we stop at the endobj.
/resolveopdict mark
valueopdict { } forall
/endobj { endobj exit } bind
.dicttomark readonly def
/resolveR % <object#> <generation#> resolveR <object>
{ DEBUG { (Resolving: ) print 2 copy 2 array astore == } if
1 index unresolved?
{ PDFfile fileposition 3 1 roll
1 index readxrefentry
3 1 roll checkgeneration
% Stack: savepos objpos obj#
exch PDFfile exch setfileposition
PDFfile token pop 2 copy ne
{ (xref error!\n) print /resolveR cvx /rangecheck signalerror
}
if pop PDFfile token pop
PDFfile token pop /obj ne
{ (xref error!\n) print /resolveR cvx /rangecheck signalerror
}
if
PDFfile resolveopdict pdfrun
exch PDFfile exch setfileposition
}
{ pop Objects exch get
}
ifelse
} bind def
%================================ Streams ================================ %
% We represent a stream by an executable dictionary that contains,
% in addition to the contents of the original stream dictionary:
% /File - the file or string where the stream contents are stored;
% /FilePosition - iff File is a file, the position in the file
% where the contents start.
% We do the real work of constructing the data stream only when the
% contents are needed.
% Construct a stream. The length is not reliable in the face of
% different end-of-line conventions, but it's all we've got.
% Since the stream keyword may be followed by 0, 1, or more blanks,
% we have to back up in the file to find where the data actually starts.
% The stream keyword may be followed by a blank line, which we also
% must skip before reading any data. (This is chancy if the data are
% in binary form, but such files are questionable to begin with.)
/streamskipeols
{ { PDFsource read not { /stream cvx /syntaxerror signalerror } if
dup 10 eq 1 index 13 eq or not { PDFsource exch unread exit } if pop
}
loop
} bind def
/stream
{ PDFsource PDFfile eq
{ dup /File PDFfile put
prevline pop pop
streamskipeols
dup /FilePosition PDFfile fileposition put
PDFfile fileposition 1 index /Length oget add
PDFfile exch setfileposition
}
{ % We're already reading from a stream, which we can't reposition.
% Capture the sub-stream contents in a string.
streamskipeols
dup /Length oget string PDFsource exch readstring
not
{ (Unexpected EOF in stream!\n) print
/stream cvx /rangecheck signalerror
}
if
1 index exch /File exch put
}
ifelse
PDFsource token pop
/endstream ne { /stream cvx /syntaxerror signalerror } if
cvx
} bind def
% Resolve a stream dictionary to a PostScript stream.
% Streams with no filters require special handling:
% - If we are going to interpret their contents, we let endstream
% terminate the interpretation loop;
% - If we are just going to read data from them, we impose
% a SubFileDecode filter that reads just the requisite amount of data.
% Note that, in general, resolving a stream repositions PDFfile.
% Clients must save and restore the position of PDFfile themselves.
/resolvestream % <streamdict> <readdata?> resolvestream <stream>
{ exch dup /FilePosition .knownget
{ 1 index /File get exch setfileposition }
if
% Stack: readdata? dict
dup /DecodeParms .knownget not { null } if
1 index /Filter .knownget not { {} } if
dup type /nametype eq
{ 1 array astore
1 index null ne { exch 1 array astore exch } if
}
if
% Stack: readdata? dict parms filternames
2 index /File get exch
% Stack: readdata? dict parms file/string filternames
dup length 0 eq
{ % All the PDF filters have EOD markers, but in this case
% there is no specified filter.
pop exch pop
% Stack: readdata? dict file/string
2 index
{ % We're going to read data; use a SubFileDecode filter.
1 index /Length oget () /SubFileDecode filter
}
{ dup type /filetype ne
{ % Use a SubFileDecode filter to read from a string.
0 () SubFileDecode filter
}
if
}
ifelse
}
{ 2 index null eq
{ { filter }
}
{ % Stack: readdata? dict parms file/string filtername
{ 2 index 0 get dup null eq { pop } { exch } ifelse filterpdf
exch dup length 1 sub 1 exch getinterval exch
}
}
ifelse forall exch pop
}
ifelse
% Stack: readdata? dict file
exch pop exch pop
} bind def
/endstream { exit } def
% Construct a PDF filter. These are the same as PostScript filters,
% with one exception: LZWDecode filters with Predictor=2 must insert
% a PixelDifferenceDecode filter in the pipeline.
/filterpdf % <source> <...params...> <name> filterpdf <stream>
{ dup /LZWDecode eq 2 index type /dicttype eq and
{ 1 index /Predictor .knownget not { 1 } if 1 sub
{ { filter }
{ 1 index 4 1 roll filter exch /PixelDifferenceDecode filter }
}
exch get exec
}
{ filter
}
ifelse
} bind def
end % pdfdict
.setglobal
|