File: PostScript.pm

package info (click to toggle)
libimage-exiftool-perl 7.30-1
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 8,436 kB
  • ctags: 1,652
  • sloc: perl: 72,274; sh: 62; makefile: 44
file content (478 lines) | stat: -rw-r--r-- 16,467 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
#------------------------------------------------------------------------------
# File:         PostScript.pm
#
# Description:  Read PostScript meta information
#
# Revisions:    07/08/05 - P. Harvey Created
#
# References:   1) http://partners.adobe.com/public/developer/en/ps/5002.EPSF_Spec.pdf
#               2) http://partners.adobe.com/public/developer/en/ps/5001.DSC_Spec.pdf
#               3) http://partners.adobe.com/public/developer/en/illustrator/sdk/AI7FileFormat.pdf
#------------------------------------------------------------------------------

package Image::ExifTool::PostScript;

use strict;
use vars qw($VERSION $AUTOLOAD);
use Image::ExifTool qw(:DataAccess :Utils);

$VERSION = '1.20';

sub WritePS($$);

# PostScript tag table
%Image::ExifTool::PostScript::Main = (
    WRITE_PROC => \&WritePS,
    PREFERRED => 1, # always add these tags when writing
    GROUPS => { 2 => 'Image' },
    # Note: Make all of these tags priority 0 since the first one found at
    # the start of the file should take priority (in case multiples exist)
    Author      => { Priority => 0, Groups => { 2 => 'Author' }, Writable => 'string' },
    BoundingBox => { Priority => 0 },
    Copyright   => { Priority => 0, Writable => 'string' }, #2
    CreationDate => {
        Name => 'CreateDate',
        Priority => 0,
        Groups => { 2 => 'Time' },
        Writable => 'string',
    },
    Creator     => { Priority => 0, Writable => 'string' },
    ImageData   => { Priority => 0 },
    For         => { Priority => 0, Writable => 'string', Notes => 'for whom the document was prepared'},
    Keywords    => { Priority => 0, Writable => 'string' },
    ModDate => {
        Name => 'ModifyDate',
        Priority => 0,
        Groups => { 2 => 'Time' },
        Writable => 'string',
    },
    Pages       => { Priority => 0 },
    Routing     => { Priority => 0, Writable => 'string' }, #2
    Subject     => { Priority => 0, Writable => 'string' },
    Title       => { Priority => 0, Writable => 'string' },
    Version     => { Priority => 0, Writable => 'string' }, #2
    # these subdirectories for documentation only
    BeginPhotoshop => {
        Name => 'PhotoshopData',
        SubDirectory => {
            TagTable => 'Image::ExifTool::Photoshop::Main',
        },
    },
    BeginICCProfile => {
        Name => 'ICC_Profile',
        SubDirectory => {
            TagTable => 'Image::ExifTool::ICC_Profile::Main',
        },
    },
    begin_xml_packet => {
        Name => 'XMP',
        SubDirectory => {
            TagTable => 'Image::ExifTool::XMP::Main',
        },
    },
    TIFFPreview => {
        Binary => 1,
        Notes => q{
            not a real tag ID, but used to represent the TIFF preview extracted from DOS
            EPS images
        },
    },
);

# composite tags
%Image::ExifTool::PostScript::Composite = (
    GROUPS => { 2 => 'Image' },
    # BoundingBox is in points, not pixels,
    # but use it anyway if ImageData is not available
    ImageWidth => {
        Desire => {
            0 => 'PostScript:ImageData',
            1 => 'PostScript:BoundingBox',
        },
        ValueConv => 'Image::ExifTool::PostScript::ImageSize(\@val, 0)',
    },
    ImageHeight => {
        Desire => {
            0 => 'PostScript:ImageData',
            1 => 'PostScript:BoundingBox',
        },
        ValueConv => 'Image::ExifTool::PostScript::ImageSize(\@val, 1)',
    },
);

# add our composite tags
Image::ExifTool::AddCompositeTags('Image::ExifTool::PostScript');

#------------------------------------------------------------------------------
# AutoLoad our writer routines when necessary
#
sub AUTOLOAD
{
    return Image::ExifTool::DoAutoLoad($AUTOLOAD, @_);
}

#------------------------------------------------------------------------------
# Get image width or height
# Inputs: 0) value list ref (ImageData, BoundingBox), 1) true to get height
sub ImageSize($$)
{
    my ($vals, $getHeight) = @_;
    my ($w, $h);
    if ($$vals[0] and $$vals[0] =~ /^(\d+) (\d+)/) {
        ($w, $h) = ($1, $2);
    } elsif ($$vals[1] and $$vals[1] =~ /^(\d+) (\d+) (\d+) (\d+)/) {
        ($w, $h) = ($3 - $1, $4 - $2);
    }
    return $getHeight ? $h : $w;
}

#------------------------------------------------------------------------------
# Set PostScript format error warning
# Inputs: 0) ExifTool object reference, 1) error string
# Returns: 1
sub PSErr($$)
{
    my ($exifTool, $str) = @_;
    # set file type if not done already
    $exifTool->SetFileType('PS') unless $exifTool->GetValue('FileType');
    $exifTool->Warn("PostScript format error ($str)");
    return 1;
}

#------------------------------------------------------------------------------
# set $/ according to the current file
# Inputs: 0) RAF reference
# Returns: Original separator or undefined if on error
sub SetInputRecordSeparator($)
{
    my $raf = shift;
    my $oldsep = $/;
    my $pos = $raf->Tell(); # save current position
    my $data;
    $raf->Read($data,256) or return undef;
    my ($a, $d) = (999,999);
    $a = pos($data), pos($data) = 0 if $data =~ /\x0a/g;
    $d = pos($data) if $data =~ /\x0d/g;
    my $diff = $a - $d;
    if ($diff eq 1) {
        $/ = "\x0d\x0a";
    } elsif ($diff eq -1) {
        $/ = "\x0a\x0d";
    } elsif ($diff > 0) {
        $/ = "\x0d";
    } elsif ($diff < 0) {
        $/ = "\x0a";
    } else {
        return undef;       # error
    }
    $raf->Seek($pos, 0);    # restore original position
    return $oldsep;
}

#------------------------------------------------------------------------------
# Decode comment from PostScript file
# Inputs: 0) comment string, 1) RAF ref, 2) reference to lines array
#         3) optional data reference for extra lines read from file
# Returns: Decoded comment string (may be an array reference)
# - handles multi-line comments and escape sequences
sub DecodeComment($$$;$)
{
    my ($val, $raf, $lines, $dataPt) = @_;
    $val =~ s/\x0d*\x0a*$//;        # remove trailing CR, LF or CR/LF
    # check for continuation comments
    for (;;) {
        unless (@$lines) {
            my $buff;
            $raf->ReadLine($buff) or last;
            my $altnl = $/ eq "\x0d" ? "\x0a" : "\x0d";
            if ($buff =~ /$altnl/) {
                # split into separate lines
                @$lines = split /$altnl/, $buff, -1;
                # handle case of DOS newline data inside file using Unix newlines
                @$lines = ( $$lines[0] . $$lines[1] ) if @$lines == 2 and $$lines[1] eq $/;
            } else {
                push @$lines, $buff;
            }
        }
        last unless $$lines[0] =~ s/^%%\+//;    # is the next line a continuation?
        $$dataPt .= "%%+$$lines[0]" if $dataPt; # add to data if necessary
        $$lines[0] =~ s/\x0d*\x0a*$//;          # remove trailing CR, LF or CR/LF
        $val .= shift @$lines;
    }
    my @vals;
    # handle bracketed string values
    if ($val =~ s/^\((.*)\)$/$1/) { # remove brackets if necessary
        # split into an array of strings if necessary
        my $nesting = 1;
        while ($val =~ /(\(|\))/g) {
            my $bra = $1;
            my $pos = pos($val) - 2;
            my $backslashes = 0;
            while ($pos and substr($val, $pos, 1) eq '\\') {
                --$pos;
                ++$backslashes;
            }
            next if $backslashes & 0x01;    # escaped if odd number
            if ($bra eq '(') {
                ++$nesting;
            } else {
                --$nesting;
                unless ($nesting) {
                    push @vals, substr($val, 0, pos($val)-1);
                    $val = substr($val, pos($val));
                    ++$nesting if $val =~ s/\s*\(//;
                }
            }
        }
        push @vals, $val;
        foreach $val (@vals) {
            # decode escape sequences in bracketed strings
            # (similar to code in PDF.pm, but without line continuation)
            while ($val =~ /\\(.)/sg) {
                my $n = pos($val) - 2;
                my $c = $1;
                my $r;
                if ($c =~ /[0-7]/) {
                    # get up to 2 more octal digits
                    $c .= $1 if $val =~ /\G([0-7]{1,2})/g;
                    # convert octal escape code
                    $r = chr(oct($c) & 0xff);
                } else {
                    # convert escaped characters
                    ($r = $c) =~ tr/nrtbf/\n\r\t\b\f/;
                }
                substr($val, $n, length($c)+1) = $r;
                # continue search after this character
                pos($val) = $n + length($r);
            }
        }
        $val = @vals > 1 ? \@vals : $vals[0];
    }
    return $val;
}

#------------------------------------------------------------------------------
# Extract information from EPS, PS or AI file
# Inputs: 0) ExifTool object reference, 1) dirInfo reference
# Returns: 1 if this was a valid PostScript file
sub ProcessPS($$)
{
    my ($exifTool, $dirInfo) = @_;
    my $raf = $$dirInfo{RAF};
    my ($data, $dos);
#
# determine if this is a postscript file
#
    $raf->Read($data, 4) == 4 or return 0;
    # accept either ASCII or DOS binary postscript file format
    return 0 unless $data =~ /^(%!PS|%!Ad|\xc5\xd0\xd3\xc6)/;
    if ($data =~ /^%!Ad/) {
        # I've seen PS files start with "%!Adobe-PS"...
        return 0 unless $raf->Read($data, 6) == 6 and $data eq "obe-PS";
    } elsif ($data =~ /^\xc5\xd0\xd3\xc6/) {
        # process DOS binary file header
        # - save DOS header then seek ahead and check PS header
        $raf->Read($dos, 26) == 26 or return 0;
        SetByteOrder('II');
        unless ($raf->Seek(Get32u(\$dos, 0), 0) and
                $raf->Read($data, 4) == 4 and $data eq '%!PS')
        {
            return PSErr($exifTool, 'invalid header');
        }
    }
#
# set the newline type based on the first newline found in the file
#
    my $oldsep = SetInputRecordSeparator($raf);
    $oldsep or return PSErr($exifTool, 'invalid PS data');

    # set file type (PostScript or EPS)
    $raf->ReadLine($data) or return 0;
    $exifTool->SetFileType($data =~ /EPSF/ ? 'EPS' : 'PS');
#
# extract TIFF information from DOS header
#
    my $tagTablePtr = GetTagTable('Image::ExifTool::PostScript::Main');
    if ($dos) {
        my $base = Get32u(\$dos, 16);
        if ($base) {
            my $pos = $raf->Tell();
            # extract the TIFF preview
            my $len = Get32u(\$dos, 20);
            my $val = $exifTool->ExtractBinary($base, $len, 'TIFFPreview');
            if (defined $val and $val =~ /^(MM\0\x2a|II\x2a\0|Binary)/) {
                $exifTool->HandleTag($tagTablePtr, 'TIFFPreview', $val);
            } else {
                $exifTool->Warn('Bad TIFF preview image');
            }
            # extract information from TIFF in DOS header
            # (set Parent to '' to avoid setting FileType tag again)
            my %dirInfo = (
                Parent => '',
                RAF => $raf,
                Base => $base,
            );
            $exifTool->ProcessTIFF(\%dirInfo) or $exifTool->Warn('Bad embedded TIFF');
            # position file pointer to extract PS information
            $raf->Seek($pos, 0);
        }
    }
#
# parse the postscript
#
    my ($buff, $mode, $endToken);
    my (@lines, $altnl);
    if ($/ eq "\x0d") {
        $altnl = "\x0a";
    } else {
        $/ = "\x0a";        # end on any LF (even if DOS CR+LF)
        $altnl = "\x0d";
    }
    for (;;) {
        if (@lines) {
            $data = shift @lines;
        } else {
            $raf->ReadLine($data) or last;
            # check for alternate newlines as efficiently as possible
            if ($data =~ /$altnl/) {
                # split into separate lines
                @lines = split /$altnl/, $data, -1;
                $data = shift @lines;
                if (@lines == 1 and $lines[0] eq $/) {
                    # handle case of DOS newline data inside file using Unix newlines
                    $data .= $lines[0];
                    undef @lines;
                }
            }
        }
        if ($mode) {
            if (not $endToken) {
                $buff .= $data;
                next unless $data =~ m{<\?xpacket end=.(w|r).\?>($/|$)};
            } elsif ($data !~ /^$endToken/i) {
                if ($mode eq 'XMP') {
                    $buff .= $data;
                } elsif ($mode eq 'Document') {
                    # ignore embedded documents 
                } else {
                    # data is ASCII-hex encoded
                    $data =~ tr/0-9A-Fa-f//dc;  # remove all but hex characters
                    $buff .= pack('H*', $data); # translate from hex
                }
                next;
            }
        } elsif ($data =~ /^(%{1,2})(Begin)(_xml_packet|Photoshop|ICCProfile|Binary)/i) {
            # the beginning of a data block
            my %modeLookup = (
                _xml_packet => 'XMP',
                iccprofile  => 'ICC_Profile',
                photoshop   => 'Photoshop',
            );
            $mode = $modeLookup{lc($3)};
            unless ($mode or @lines) {
                # skip binary data
                $raf->Seek($1, 1) or last if $data =~ /^%{1,2}BeginBinary:\s*(\d+)/i;
                next;
            }
            $buff = '';
            $endToken = $1 . ($2 eq 'begin' ? 'end' : 'End') . $3;
            next;
        } elsif ($data =~ /^(%{1,2})(Begin)(Document)/i) {
            $mode = 'Document';
            next;
        } elsif ($data =~ /^<\?xpacket begin=.{7,13}W5M0MpCehiHzreSzNTczkc9d/) {
            # pick up any stray XMP data
            $mode = 'XMP';
            $buff = $data;
            undef $endToken;    # no end token (just look for xpacket end)
            # XMP could be contained in a single line (if newlines are different)
            next unless $data =~ m{<\?xpacket end=.(w|r).\?>($/|$)};
        } elsif ($data =~ /^%%?(\w+): ?(.*)/s and $$tagTablePtr{$1}) {
            my ($tag, $val) = ($1, $2);
            # only allow 'ImageData' to have single leading '%'
            next unless $data =~ /^%%/ or $1 eq 'ImageData';
            # decode comment string (reading continuation lines if necessary)
            $val = DecodeComment($val, $raf, \@lines);
            $exifTool->HandleTag($tagTablePtr, $tag, $val);
            next;
        } else {
            next;
        }
        # extract information from buffered data
        if ($mode ne 'Document'){
            my %dirInfo = (
                DataPt => \$buff,
                DataLen => length $buff,
                DirStart => 0,
                DirLen => length $buff,
                Parent => 'PostScript',
            );
            my $subTablePtr = GetTagTable("Image::ExifTool::${mode}::Main");
            unless ($exifTool->ProcessDirectory(\%dirInfo, $subTablePtr)) {
                $exifTool->Warn("Error processing $mode information in PostScript file");
            }
            undef $buff;
        }
        undef $mode;
    }
    $/ = $oldsep;   # restore original separator
    $mode and $mode ne 'Document' and PSErr($exifTool, "unterminated $mode data");
    return 1;
}

#------------------------------------------------------------------------------
# Extract information from EPS file
# Inputs: 0) ExifTool object reference, 1) dirInfo reference
# Returns: 1 if this was a valid PostScript file
sub ProcessEPS($$)
{
    return ProcessPS($_[0],$_[1]);
}

1; # end


__END__

=head1 NAME

Image::ExifTool::PostScript - Read PostScript meta information

=head1 SYNOPSIS

This module is loaded automatically by Image::ExifTool when required.

=head1 DESCRIPTION

This code reads meta information from EPS (Encapsulated PostScript), PS
(PostScript) and AI (Adobe Illustrator) files.

=head1 NOTES

Currently doesn't handle continued lines ("%+" syntax).

=head1 AUTHOR

Copyright 2003-2008, Phil Harvey (phil at owl.phy.queensu.ca)

This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=head1 REFERENCES

=over 4

=item L<http://partners.adobe.com/public/developer/en/ps/5002.EPSF_Spec.pdf>

=item L<http://partners.adobe.com/public/developer/en/ps/5001.DSC_Spec.pdf>

=item L<http://partners.adobe.com/public/developer/en/illustrator/sdk/AI7FileFormat.pdf>

=back

=head1 SEE ALSO

L<Image::ExifTool::TagNames/PostScript Tags>,
L<Image::ExifTool(3pm)|Image::ExifTool>

=cut