File: EpsReader.java

package info (click to toggle)
libmetadata-extractor-java 2.11.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, buster, forky, sid, trixie
  • size: 6,416 kB
  • sloc: java: 35,343; xml: 200; sh: 11; makefile: 2
file content (395 lines) | stat: -rw-r--r-- 15,425 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
package com.drew.metadata.eps;

import com.drew.imaging.tiff.TiffProcessingException;
import com.drew.imaging.tiff.TiffReader;
import com.drew.lang.*;
import com.drew.lang.annotations.NotNull;
import com.drew.lang.annotations.Nullable;
import com.drew.metadata.Metadata;
import com.drew.metadata.icc.IccReader;
import com.drew.metadata.photoshop.PhotoshopReader;
import com.drew.metadata.photoshop.PhotoshopTiffHandler;
import com.drew.metadata.xmp.XmpReader;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;

/**
 * Reads file passed in through SequentialReader and parses encountered data:
 * <ul>
 *     <li>Basic EPS Comments</li>
 *     <li>EXIF</li>
 *     <li>Photoshop</li>
 *     <li>IPTC</li>
 *     <li>ICC Profile</li>
 *     <li>XMP</li>
 * </ul>
 * EPS comments are retrieved from EPS directory.  Photoshop, ICC Profile, and XMP processing
 * is passed to their respective reader.
 * <p/>
 * EPS Constraints (Source: https://www-cdf.fnal.gov/offline/PostScript/5001.PDF pg.18):
 * <ul>
 *     <li>Max line length is 255 characters</li>
 *     <li>Lines end with a CR(0xD) or LF(0xA) character (or both, in practice)</li>
 *     <li>':' separates keywords (considered part of the keyword)</li>
 *     <li>Whitespace is either a space(0x20) or tab(0x9)</li>
 *     <li>If there is more than one header, the 1st is truth</li>
 * </ul>
 *
 * @author Payton Garland
 */
public class EpsReader
{
    private int _previousTag;

    /**
     * Filter method that determines if file will contain an EPS Header.  If it does, it will read the necessary
     * data and then set the position to the beginning of the PostScript data.  If it does not, the position will not
     * be changed.  After both scenarios, the main extract method is called.
     *
     * @param inputStream InputStream containing file
     * @param metadata Metadata to add directory to and extracted data
     */
    public void extract(@NotNull final InputStream inputStream, @NotNull final Metadata metadata) throws IOException
    {
        RandomAccessStreamReader reader = new RandomAccessStreamReader(inputStream);
        EpsDirectory directory = new EpsDirectory();
        metadata.addDirectory(directory);

        /*
         * 0xC5D0D3C6 signifies an EPS Header block which contains 32-bytes of basic information
         *
         * 0x25215053 (%!PS) signifies an EPS File and leads straight into the PostScript
         */
        switch (reader.getInt32(0)) {
            case 0xC5D0D3C6:
                reader.setMotorolaByteOrder(false);
                int postScriptOffset = reader.getInt32(4);
                int postScriptLength = reader.getInt32(8);
                int wmfOffset = reader.getInt32(12);
                int wmfSize = reader.getInt32(16);
                int tifOffset = reader.getInt32(20);
                int tifSize = reader.getInt32(24);
                //int checkSum = reader.getInt32(28);

                // Get Tiff/WMF preview data if applicable
                if (tifSize != 0) {
                    directory.setInt(EpsDirectory.TAG_TIFF_PREVIEW_SIZE, tifSize);
                    directory.setInt(EpsDirectory.TAG_TIFF_PREVIEW_OFFSET, tifOffset);
                    // Get Tiff metadata
                    try {
                        ByteArrayReader byteArrayReader = new ByteArrayReader(reader.getBytes(tifOffset, tifSize));
                        new TiffReader().processTiff(byteArrayReader, new PhotoshopTiffHandler(metadata, null), 0);
                    } catch (TiffProcessingException ex) {
                        directory.addError("Unable to process TIFF data: " + ex.getMessage());
                    }
                } else if (wmfSize != 0) {
                    directory.setInt(EpsDirectory.TAG_WMF_PREVIEW_SIZE, wmfSize);
                    directory.setInt(EpsDirectory.TAG_WMF_PREVIEW_OFFSET, wmfOffset);
                }

                // TODO avoid allocating byte array here -- read directly from InputStream
                extract(directory, metadata, new SequentialByteArrayReader(reader.getBytes(postScriptOffset, postScriptLength)));
                break;
            case 0x25215053:
                inputStream.reset();
                extract(directory, metadata, new StreamReader(inputStream));
                break;
            default:
                directory.addError("File type not supported.");
                break;
        }
    }

    /**
     * Main method that parses all comments and then distributes data extraction among other methods that parse the
     * rest of file and store encountered data in metadata (if there exists an entry in EpsDirectory
     * for the found data).  Reads until a begin data/binary comment is found or _reader's estimated
     * available data has run out (or AI09 End Private Data).  Will extract data from normal EPS comments, Photoshop, ICC, and XMP.
     *
     * @param metadata Metadata to add directory to and extracted data
     */
    private void extract(@NotNull final EpsDirectory directory, @NotNull Metadata metadata, @NotNull SequentialReader reader) throws IOException
    {
        StringBuilder line = new StringBuilder();

        while (true) {
            line.setLength(0);

            // Read the next line, excluding any trailing newline character
            // Note that for Windows-style line endings ("\r\n") the outer loop will be run a second time with an empty
            // string, which is fine.
            while (true) {
                char c = (char)reader.getByte();
                if (c == '\r' || c == '\n')
                    break;
                line.append(c);
            }

            // Stop when we hit a line that is not a comment
            if (line.length() != 0 && line.charAt(0) != '%')
                break;

            String name;

            // ':' signifies there is an associated keyword (should be put in directory)
            // otherwise, the name could be a marker
            int colonIndex = line.indexOf(":");
            if (colonIndex != -1) {
                name = line.substring(0, colonIndex).trim();
                String value = line.substring(colonIndex + 1).trim();
                addToDirectory(directory, name, value);
            } else {
                name = line.toString().trim();
            }

            // Some comments will both have a value and signify a new block to follow
            if (name.equals("%BeginPhotoshop")) {
                extractPhotoshopData(metadata, reader);
            } else if (name.equals("%%BeginICCProfile")) {
                extractIccData(metadata, reader);
            } else if (name.equals("%begin_xml_packet")) {
                extractXmpData(metadata, reader);
            }
        }
    }

    /**
     * Default case that adds comment with keyword to directory
     *
     * @param directory EpsDirectory to add extracted data to
     * @param name String that holds name of current comment
     * @param value String that holds value of current comment
     */
    private void addToDirectory(@NotNull final EpsDirectory directory, String name, String value) throws IOException
    {
        Integer tag = EpsDirectory._tagIntegerMap.get(name);

        if (tag == null)
            return;

        switch (tag) {
            case EpsDirectory.TAG_IMAGE_DATA:
                extractImageData(directory, value);
                break;
            case EpsDirectory.TAG_CONTINUE_LINE:
                directory.setString(_previousTag, directory.getString(_previousTag) + " " + value);
                break;
            default:
                if (EpsDirectory._tagNameMap.containsKey(tag) && !directory.containsTag(tag)) {
                    directory.setString(tag, value);
                    _previousTag = tag;
                } else {
                    // Set previous tag to an Integer that doesn't exist in EpsDirectory
                    _previousTag = 0;
                }
                break;
        }
        _previousTag = tag;
    }

    /**
     * Parses <code>%ImageData</code> comment which holds several values including width in px,
     * height in px and color type.
     */
    private static void extractImageData(@NotNull final EpsDirectory directory, String imageData) throws IOException
    {
        // %ImageData: 1000 1000 8 3 1 1000 7 "beginimage"
        directory.setString(EpsDirectory.TAG_IMAGE_DATA, imageData.trim());

        String[] imageDataParts = imageData.split(" ");

        int width = Integer.parseInt(imageDataParts[0]);
        int height = Integer.parseInt(imageDataParts[1]);
        int colorType = Integer.parseInt(imageDataParts[3]);

        // Only add values that are not already present
        if (!directory.containsTag(EpsDirectory.TAG_IMAGE_WIDTH))
            directory.setInt(EpsDirectory.TAG_IMAGE_WIDTH, width);
        if (!directory.containsTag(EpsDirectory.TAG_IMAGE_HEIGHT))
            directory.setInt(EpsDirectory.TAG_IMAGE_HEIGHT, height);
        if (!directory.containsTag(EpsDirectory.TAG_COLOR_TYPE))
            directory.setInt(EpsDirectory.TAG_COLOR_TYPE, colorType);

        if (!directory.containsTag(EpsDirectory.TAG_RAM_SIZE)) {
            int bytesPerPixel = 0;
            if (colorType == 1)
                bytesPerPixel = 1; // grayscale
            else if (colorType == 2 || colorType == 3)
                bytesPerPixel = 3; // Lab or RGB
            else if (colorType == 4)
                bytesPerPixel = 3; // CMYK

            if (bytesPerPixel != 0)
                directory.setInt(EpsDirectory.TAG_RAM_SIZE, bytesPerPixel * width * height);
        }
    }

    /**
     * Decodes a commented hex section, and uses {@link PhotoshopReader} to decode the resulting data.
     */
    private static void extractPhotoshopData(@NotNull final Metadata metadata, @NotNull SequentialReader reader) throws IOException
    {
        byte[] buffer = decodeHexCommentBlock(reader);

        if (buffer != null)
            new PhotoshopReader().extract(new SequentialByteArrayReader(buffer), buffer.length, metadata);
    }

    /**
     * Decodes a commented hex section, and uses {@link IccReader} to decode the resulting data.
     */
    private static void extractIccData(@NotNull final Metadata metadata, @NotNull SequentialReader reader) throws IOException
    {
        byte[] buffer = decodeHexCommentBlock(reader);

        if (buffer != null)
            new IccReader().extract(new ByteArrayReader(buffer), metadata);
    }

    /**
     * Extracts an XMP xpacket, and uses {@link XmpReader} to decode the resulting data.
     */
    private static void extractXmpData(@NotNull final Metadata metadata, @NotNull SequentialReader reader) throws IOException
    {
        byte[] bytes = readUntil(reader, "<?xpacket end=\"w\"?>".getBytes());
        String xmp = new String(bytes, Charsets.UTF_8);
        new XmpReader().extract(xmp, metadata);
    }

    /**
     * Reads all bytes until the given sentinel is observed.
     * The sentinel will be included in the returned bytes.
     */
    private static byte[] readUntil(@NotNull SequentialReader reader, @NotNull byte[] sentinel) throws IOException
    {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();

        final int length = sentinel.length;
        int depth = 0;

        while (depth != length) {
            byte b = reader.getByte();
            if (b == sentinel[depth])
                depth++;
            else
                depth = 0;
            bytes.write(b);
        }

        return bytes.toByteArray();
    }

    /**
     * EPS files can contain hexadecimal-encoded ASCII blocks, each prefixed with <c>"% "</c>.
     * This method reads such a block and returns a byte[] of the decoded contents.
     * Reading stops at the first invalid line, which is discarded (it's a terminator anyway).
     * <p/>
     * For example:
     * <pre><code>
     * %BeginPhotoshop: 9564
     * % 3842494D040400000000005D1C015A00031B25471C0200000200041C02780004
     * % 6E756C6C1C027A00046E756C6C1C025000046E756C6C1C023700083230313630
     * % 3331311C023C000B3131343335362B303030301C023E00083230313630333131
     * % 48000000010000003842494D03FD0000000000080101000000000000
     * %EndPhotoshop
     * </code></pre>
     * When calling this method, the reader must be positioned at the start of the first line containing
     * hex data, not at the introductory line.
     *
     * @return The decoded bytes, or <code>null</code> if decoding failed.
     */
    @Nullable
    private static byte[] decodeHexCommentBlock(@NotNull SequentialReader reader) throws IOException
    {
        ByteArrayOutputStream bytes = new ByteArrayOutputStream();

        // Use a state machine to efficiently parse data in a single traversal

        final int AwaitingPercent = 0;
        final int AwaitingSpace = 1;
        final int AwaitingHex1 = 2;
        final int AwaitingHex2 = 3;

        int state = AwaitingPercent;

        int carry = 0;
        boolean done = false;

        byte b = 0;
        while (!done) {
            b = reader.getByte();

            switch (state) {
                case AwaitingPercent: {
                    switch (b) {
                        case '\r':
                        case '\n':
                        case ' ':
                            // skip newline chars and spaces
                            break;
                        case '%':
                            state = AwaitingSpace;
                            break;
                        default:
                            return null;
                    }
                    break;
                }
                case AwaitingSpace: {
                    switch (b) {
                        case ' ':
                            state = AwaitingHex1;
                            break;
                        default:
                            done = true;
                            break;
                    }
                    break;
                }
                case AwaitingHex1: {
                    int i = tryHexToInt(b);
                    if (i != -1) {
                        carry = i * 16;
                        state = AwaitingHex2;
                    } else if (b == '\r' || b == '\n') {
                        state = AwaitingPercent;
                    } else {
                        return null;
                    }
                    break;
                }
                case AwaitingHex2: {
                    int i = tryHexToInt(b);
                    if (i == -1)
                        return null;
                    bytes.write(carry + i);
                    state = AwaitingHex1;
                    break;
                }
            }
        }

        // skip through the remainder of the last line
        while (b != '\n')
            b = reader.getByte();

        return bytes.toByteArray();
    }

    /**
     * Treats a byte as an ASCII character, and returns it's numerical value in hexadecimal.
     * If conversion is not possible, returns -1.
     */
    private static int tryHexToInt(byte b)
    {
        if (b >= '0' && b <= '9')
            return b - '0';
        if (b >= 'A' && b <= 'F')
            return b - 'A' + 10;
        if (b >= 'a' && b <= 'f')
            return b - 'a' + 10;
        return -1;
    }
}