File: IptcReader.java

package info (click to toggle)
libmetadata-extractor-java 2.11.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, buster, forky, sid, trixie
  • size: 6,416 kB
  • sloc: java: 35,343; xml: 200; sh: 11; makefile: 2
file content (241 lines) | stat: -rw-r--r-- 9,785 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
/*
 * Copyright 2002-2017 Drew Noakes
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 *
 * More information about this project is available at:
 *
 *    https://drewnoakes.com/code/exif/
 *    https://github.com/drewnoakes/metadata-extractor
 */
package com.drew.metadata.iptc;

import com.drew.imaging.jpeg.JpegSegmentMetadataReader;
import com.drew.imaging.jpeg.JpegSegmentType;
import com.drew.lang.SequentialByteArrayReader;
import com.drew.lang.SequentialReader;
import com.drew.lang.annotations.NotNull;
import com.drew.lang.annotations.Nullable;
import com.drew.metadata.Directory;
import com.drew.metadata.Metadata;
import com.drew.metadata.StringValue;

import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Collections;

/**
 * Decodes IPTC binary data, populating a {@link Metadata} object with tag values in an {@link IptcDirectory}.
 * <p>
 * http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf
 *
 * @author Drew Noakes https://drewnoakes.com
 */
public class IptcReader implements JpegSegmentMetadataReader
{
    // TODO consider breaking the IPTC section up into multiple directories and providing segregation of each IPTC directory
/*
    public static final int DIRECTORY_IPTC = 2;

    public static final int ENVELOPE_RECORD = 1;
    public static final int APPLICATION_RECORD_2 = 2;
    public static final int APPLICATION_RECORD_3 = 3;
    public static final int APPLICATION_RECORD_4 = 4;
    public static final int APPLICATION_RECORD_5 = 5;
    public static final int APPLICATION_RECORD_6 = 6;
    public static final int PRE_DATA_RECORD = 7;
    public static final int DATA_RECORD = 8;
    public static final int POST_DATA_RECORD = 9;
*/
    private static final byte IptcMarkerByte = 0x1c;

    @NotNull
    public Iterable<JpegSegmentType> getSegmentTypes()
    {
        return Collections.singletonList(JpegSegmentType.APPD);
    }

    public void readJpegSegments(@NotNull Iterable<byte[]> segments, @NotNull Metadata metadata, @NotNull JpegSegmentType segmentType)
    {
        for (byte[] segmentBytes : segments) {
            // Ensure data starts with the IPTC marker byte
            if (segmentBytes.length != 0 && segmentBytes[0] == IptcMarkerByte) {
                extract(new SequentialByteArrayReader(segmentBytes), metadata, segmentBytes.length);
            }
        }
    }

    /**
     * Performs the IPTC data extraction, adding found values to the specified instance of {@link Metadata}.
     */
    public void extract(@NotNull final SequentialReader reader, @NotNull final Metadata metadata, long length)
    {
        extract(reader, metadata, length, null);
    }

    /**
     * Performs the IPTC data extraction, adding found values to the specified instance of {@link Metadata}.
     */
    public void extract(@NotNull final SequentialReader reader, @NotNull final Metadata metadata, long length, @Nullable Directory parentDirectory)
    {
        IptcDirectory directory = new IptcDirectory();
        metadata.addDirectory(directory);

        if (parentDirectory != null)
            directory.setParent(parentDirectory);

        int offset = 0;

        // for each tag
        while (offset < length) {

            // identifies start of a tag
            short startByte;
            try {
                startByte = reader.getUInt8();
                offset++;
            } catch (IOException e) {
                directory.addError("Unable to read starting byte of IPTC tag");
                return;
            }

            if (startByte != IptcMarkerByte) {
                // NOTE have seen images where there was one extra byte at the end, giving
                // offset==length at this point, which is not worth logging as an error.
                if (offset != length)
                    directory.addError("Invalid IPTC tag marker at offset " + (offset - 1) + ". Expected '0x" + Integer.toHexString(IptcMarkerByte) + "' but got '0x" + Integer.toHexString(startByte) + "'.");
                return;
            }

            // we need at least four bytes left to read a tag
            if (offset + 4 > length) {
                directory.addError("Too few bytes remain for a valid IPTC tag");
                return;
            }

            int directoryType;
            int tagType;
            int tagByteCount;
            try {
                directoryType = reader.getUInt8();
                tagType = reader.getUInt8();
                tagByteCount = reader.getUInt16();
                if (tagByteCount > 32767) {
                    // Extended DataSet Tag (see 1.5(c), p14, IPTC-IIMV4.2.pdf)
                    tagByteCount = ((tagByteCount & 0x7FFF) << 16) | reader.getUInt16();
                    offset += 2;
                }
                offset += 4;
            } catch (IOException e) {
                directory.addError("IPTC data segment ended mid-way through tag descriptor");
                return;
            }

            if (offset + tagByteCount > length) {
                directory.addError("Data for tag extends beyond end of IPTC segment");
                return;
            }

            try {
                processTag(reader, directory, directoryType, tagType, tagByteCount);
            } catch (IOException e) {
                directory.addError("Error processing IPTC tag");
                return;
            }

            offset += tagByteCount;
        }
    }

    private void processTag(@NotNull SequentialReader reader, @NotNull Directory directory, int directoryType, int tagType, int tagByteCount) throws IOException
    {
        int tagIdentifier = tagType | (directoryType << 8);

        // Some images have been seen that specify a zero byte tag, which cannot be of much use.
        // We elect here to completely ignore the tag. The IPTC specification doesn't mention
        // anything about the interpretation of this situation.
        // https://raw.githubusercontent.com/wiki/drewnoakes/metadata-extractor/docs/IPTC-IIMV4.2.pdf
        if (tagByteCount == 0) {
            directory.setString(tagIdentifier, "");
            return;
        }

        switch (tagIdentifier) {
            case IptcDirectory.TAG_CODED_CHARACTER_SET:
                byte[] bytes = reader.getBytes(tagByteCount);
                String charsetName = Iso2022Converter.convertISO2022CharsetToJavaCharset(bytes);
                if (charsetName == null) {
                    // Unable to determine the charset, so fall through and treat tag as a regular string
                    charsetName = new String(bytes);
                }
                directory.setString(tagIdentifier, charsetName);
                return;
            case IptcDirectory.TAG_ENVELOPE_RECORD_VERSION:
            case IptcDirectory.TAG_APPLICATION_RECORD_VERSION:
            case IptcDirectory.TAG_FILE_VERSION:
            case IptcDirectory.TAG_ARM_VERSION:
            case IptcDirectory.TAG_PROGRAM_VERSION:
                // short
                if (tagByteCount >= 2) {
                    int shortValue = reader.getUInt16();
                    reader.skip(tagByteCount - 2);
                    directory.setInt(tagIdentifier, shortValue);
                    return;
                }
                break;
            case IptcDirectory.TAG_URGENCY:
                // byte
                directory.setInt(tagIdentifier, reader.getUInt8());
                reader.skip(tagByteCount - 1);
                return;
            default:
                // fall through
        }

        // If we haven't returned yet, treat it as a string
        // NOTE that there's a chance we've already loaded the value as a string above, but failed to parse the value
        String charSetName = directory.getString(IptcDirectory.TAG_CODED_CHARACTER_SET);
        Charset charset = null;
        try {
            if (charSetName != null)
                charset = Charset.forName(charSetName);
        } catch (Throwable ignored) {
        }

        StringValue string;
        if (charSetName != null) {
            string = reader.getStringValue(tagByteCount, charset);
        } else {
            byte[] bytes = reader.getBytes(tagByteCount);
            Charset charSet = Iso2022Converter.guessCharSet(bytes);
            string = charSet != null ? new StringValue(bytes, charSet) : new StringValue(bytes, null);
        }

        if (directory.containsTag(tagIdentifier)) {
            // this fancy StringValue[] business avoids using an ArrayList for performance reasons
            StringValue[] oldStrings = directory.getStringValueArray(tagIdentifier);
            StringValue[] newStrings;
            if (oldStrings == null) {
                // TODO hitting this block means any prior value(s) are discarded
                newStrings = new StringValue[1];
            } else {
                newStrings = new StringValue[oldStrings.length + 1];
                System.arraycopy(oldStrings, 0, newStrings, 0, oldStrings.length);
            }
            newStrings[newStrings.length - 1] = string;
            directory.setStringValueArray(tagIdentifier, newStrings);
        } else {
            directory.setStringValue(tagIdentifier, string);
        }
    }
}