1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
|
/*
* Copyright 2002-2017 Drew Noakes
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* More information about this project is available at:
*
* https://drewnoakes.com/code/exif/
* https://github.com/drewnoakes/metadata-extractor
*/
package com.drew.imaging.tiff;
import com.drew.lang.RandomAccessReader;
import com.drew.lang.Rational;
import com.drew.lang.annotations.NotNull;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
/**
* Processes TIFF-formatted data, calling into client code via that {@link TiffHandler} interface.
*
* @author Drew Noakes https://drewnoakes.com
*/
public class TiffReader
{
/**
* Processes a TIFF data sequence.
*
* @param reader the {@link RandomAccessReader} from which the data should be read
* @param handler the {@link TiffHandler} that will coordinate processing and accept read values
* @param tiffHeaderOffset the offset within <code>reader</code> at which the TIFF header starts
* @throws TiffProcessingException if an error occurred during the processing of TIFF data that could not be
* ignored or recovered from
* @throws IOException an error occurred while accessing the required data
*/
public void processTiff(@NotNull final RandomAccessReader reader,
@NotNull final TiffHandler handler,
final int tiffHeaderOffset) throws TiffProcessingException, IOException
{
// This must be either "MM" or "II".
short byteOrderIdentifier = reader.getInt16(tiffHeaderOffset);
if (byteOrderIdentifier == 0x4d4d) { // "MM"
reader.setMotorolaByteOrder(true);
} else if (byteOrderIdentifier == 0x4949) { // "II"
reader.setMotorolaByteOrder(false);
} else {
throw new TiffProcessingException("Unclear distinction between Motorola/Intel byte ordering: " + byteOrderIdentifier);
}
// Check the next two values for correctness.
final int tiffMarker = reader.getUInt16(2 + tiffHeaderOffset);
handler.setTiffMarker(tiffMarker);
int firstIfdOffset = reader.getInt32(4 + tiffHeaderOffset) + tiffHeaderOffset;
// David Ekholm sent a digital camera image that has this problem
// TODO getLength should be avoided as it causes RandomAccessStreamReader to read to the end of the stream
if (firstIfdOffset >= reader.getLength() - 1) {
handler.warn("First IFD offset is beyond the end of the TIFF data segment -- trying default offset");
// First directory normally starts immediately after the offset bytes, so try that
firstIfdOffset = tiffHeaderOffset + 2 + 2 + 4;
}
Set<Integer> processedIfdOffsets = new HashSet<Integer>();
processIfd(handler, reader, processedIfdOffsets, firstIfdOffset, tiffHeaderOffset);
}
/**
* Processes a TIFF IFD.
*
* IFD Header:
* <ul>
* <li><b>2 bytes</b> number of tags</li>
* </ul>
* Tag structure:
* <ul>
* <li><b>2 bytes</b> tag type</li>
* <li><b>2 bytes</b> format code (values 1 to 12, inclusive)</li>
* <li><b>4 bytes</b> component count</li>
* <li><b>4 bytes</b> inline value, or offset pointer if too large to fit in four bytes</li>
* </ul>
*
*
* @param handler the {@link com.drew.imaging.tiff.TiffHandler} that will coordinate processing and accept read values
* @param reader the {@link com.drew.lang.RandomAccessReader} from which the data should be read
* @param processedIfdOffsets the set of visited IFD offsets, to avoid revisiting the same IFD in an endless loop
* @param ifdOffset the offset within <code>reader</code> at which the IFD data starts
* @param tiffHeaderOffset the offset within <code>reader</code> at which the TIFF header starts
* @throws IOException an error occurred while accessing the required data
*/
public static void processIfd(@NotNull final TiffHandler handler,
@NotNull final RandomAccessReader reader,
@NotNull final Set<Integer> processedIfdOffsets,
final int ifdOffset,
final int tiffHeaderOffset) throws IOException
{
Boolean resetByteOrder = null;
try {
// check for directories we've already visited to avoid stack overflows when recursive/cyclic directory structures exist
if (processedIfdOffsets.contains(Integer.valueOf(ifdOffset))) {
return;
}
// remember that we've visited this directory so that we don't visit it again later
processedIfdOffsets.add(ifdOffset);
if (ifdOffset >= reader.getLength() || ifdOffset < 0) {
handler.error("Ignored IFD marked to start outside data segment");
return;
}
// First two bytes in the IFD are the number of tags in this directory
int dirTagCount = reader.getUInt16(ifdOffset);
// Some software modifies the byte order of the file, but misses some IFDs (such as makernotes).
// The entire test image repository doesn't contain a single IFD with more than 255 entries.
// Here we detect switched bytes that suggest this problem, and temporarily swap the byte order.
// This was discussed in GitHub issue #136.
if (dirTagCount > 0xFF && (dirTagCount & 0xFF) == 0) {
resetByteOrder = reader.isMotorolaByteOrder();
dirTagCount >>= 8;
reader.setMotorolaByteOrder(!reader.isMotorolaByteOrder());
}
int dirLength = (2 + (12 * dirTagCount) + 4);
if (dirLength + ifdOffset > reader.getLength()) {
handler.error("Illegally sized IFD");
return;
}
//
// Handle each tag in this directory
//
int invalidTiffFormatCodeCount = 0;
for (int tagNumber = 0; tagNumber < dirTagCount; tagNumber++) {
final int tagOffset = calculateTagOffset(ifdOffset, tagNumber);
// 2 bytes for the tag id
final int tagId = reader.getUInt16(tagOffset);
// 2 bytes for the format code
final int formatCode = reader.getUInt16(tagOffset + 2);
final TiffDataFormat format = TiffDataFormat.fromTiffFormatCode(formatCode);
// 4 bytes dictate the number of components in this tag's data
final long componentCount = reader.getUInt32(tagOffset + 4);
final long byteCount;
if (format == null) {
Long byteCountOverride = handler.tryCustomProcessFormat(tagId, formatCode, componentCount);
if (byteCountOverride == null) {
// This error suggests that we are processing at an incorrect index and will generate
// rubbish until we go out of bounds (which may be a while). Exit now.
handler.error(String.format("Invalid TIFF tag format code %d for tag 0x%04X", formatCode, tagId));
// TODO specify threshold as a parameter, or provide some other external control over this behaviour
if (++invalidTiffFormatCodeCount > 5) {
handler.error("Stopping processing as too many errors seen in TIFF IFD");
return;
}
continue;
}
byteCount = byteCountOverride;
} else {
byteCount = componentCount * format.getComponentSizeBytes();
}
final long tagValueOffset;
if (byteCount > 4) {
// If it's bigger than 4 bytes, the dir entry contains an offset.
final long offsetVal = reader.getUInt32(tagOffset + 8);
if (offsetVal + byteCount > reader.getLength()) {
// Bogus pointer offset and / or byteCount value
handler.error("Illegal TIFF tag pointer offset");
continue;
}
tagValueOffset = tiffHeaderOffset + offsetVal;
} else {
// 4 bytes or less and value is in the dir entry itself.
tagValueOffset = tagOffset + 8;
}
if (tagValueOffset < 0 || tagValueOffset > reader.getLength()) {
handler.error("Illegal TIFF tag pointer offset");
continue;
}
// Check that this tag isn't going to allocate outside the bounds of the data array.
// This addresses an uncommon OutOfMemoryError.
if (byteCount < 0 || tagValueOffset + byteCount > reader.getLength()) {
handler.error("Illegal number of bytes for TIFF tag data: " + byteCount);
continue;
}
// Some tags point to one or more additional IFDs to process
boolean isIfdPointer = false;
if (byteCount == 4 * componentCount) {
for (int i = 0; i < componentCount; i++) {
if (handler.tryEnterSubIfd(tagId)) {
isIfdPointer = true;
int subDirOffset = tiffHeaderOffset + reader.getInt32((int) (tagValueOffset + i * 4));
processIfd(handler, reader, processedIfdOffsets, subDirOffset, tiffHeaderOffset);
}
}
}
// If it wasn't an IFD pointer, allow custom tag processing to occur
if (!isIfdPointer && !handler.customProcessTag((int) tagValueOffset, processedIfdOffsets, tiffHeaderOffset, reader, tagId, (int) byteCount)) {
// If no custom processing occurred, process the tag in the standard fashion
processTag(handler, tagId, (int) tagValueOffset, (int) componentCount, formatCode, reader);
}
}
// at the end of each IFD is an optional link to the next IFD
final int finalTagOffset = calculateTagOffset(ifdOffset, dirTagCount);
int nextIfdOffset = reader.getInt32(finalTagOffset);
if (nextIfdOffset != 0) {
nextIfdOffset += tiffHeaderOffset;
if (nextIfdOffset >= reader.getLength()) {
// Last 4 bytes of IFD reference another IFD with an address that is out of bounds
// Note this could have been caused by jhead 1.3 cropping too much
return;
} else if (nextIfdOffset < ifdOffset) {
// TODO is this a valid restriction?
// Last 4 bytes of IFD reference another IFD with an address that is before the start of this directory
return;
}
if (handler.hasFollowerIfd()) {
processIfd(handler, reader, processedIfdOffsets, nextIfdOffset, tiffHeaderOffset);
}
}
} finally {
handler.endingIFD();
if (resetByteOrder != null)
reader.setMotorolaByteOrder(resetByteOrder);
}
}
private static void processTag(@NotNull final TiffHandler handler,
final int tagId,
final int tagValueOffset,
final int componentCount,
final int formatCode,
@NotNull final RandomAccessReader reader) throws IOException
{
switch (formatCode) {
case TiffDataFormat.CODE_UNDEFINED:
// this includes exif user comments
handler.setByteArray(tagId, reader.getBytes(tagValueOffset, componentCount));
break;
case TiffDataFormat.CODE_STRING:
handler.setString(tagId, reader.getNullTerminatedStringValue(tagValueOffset, componentCount, null));
break;
case TiffDataFormat.CODE_RATIONAL_S:
if (componentCount == 1) {
handler.setRational(tagId, new Rational(reader.getInt32(tagValueOffset), reader.getInt32(tagValueOffset + 4)));
} else if (componentCount > 1) {
Rational[] array = new Rational[componentCount];
for (int i = 0; i < componentCount; i++)
array[i] = new Rational(reader.getInt32(tagValueOffset + (8 * i)), reader.getInt32(tagValueOffset + 4 + (8 * i)));
handler.setRationalArray(tagId, array);
}
break;
case TiffDataFormat.CODE_RATIONAL_U:
if (componentCount == 1) {
handler.setRational(tagId, new Rational(reader.getUInt32(tagValueOffset), reader.getUInt32(tagValueOffset + 4)));
} else if (componentCount > 1) {
Rational[] array = new Rational[componentCount];
for (int i = 0; i < componentCount; i++)
array[i] = new Rational(reader.getUInt32(tagValueOffset + (8 * i)), reader.getUInt32(tagValueOffset + 4 + (8 * i)));
handler.setRationalArray(tagId, array);
}
break;
case TiffDataFormat.CODE_SINGLE:
if (componentCount == 1) {
handler.setFloat(tagId, reader.getFloat32(tagValueOffset));
} else {
float[] array = new float[componentCount];
for (int i = 0; i < componentCount; i++)
array[i] = reader.getFloat32(tagValueOffset + (i * 4));
handler.setFloatArray(tagId, array);
}
break;
case TiffDataFormat.CODE_DOUBLE:
if (componentCount == 1) {
handler.setDouble(tagId, reader.getDouble64(tagValueOffset));
} else {
double[] array = new double[componentCount];
for (int i = 0; i < componentCount; i++)
array[i] = reader.getDouble64(tagValueOffset + (i * 4));
handler.setDoubleArray(tagId, array);
}
break;
case TiffDataFormat.CODE_INT8_S:
if (componentCount == 1) {
handler.setInt8s(tagId, reader.getInt8(tagValueOffset));
} else {
byte[] array = new byte[componentCount];
for (int i = 0; i < componentCount; i++)
array[i] = reader.getInt8(tagValueOffset + i);
handler.setInt8sArray(tagId, array);
}
break;
case TiffDataFormat.CODE_INT8_U:
if (componentCount == 1) {
handler.setInt8u(tagId, reader.getUInt8(tagValueOffset));
} else {
short[] array = new short[componentCount];
for (int i = 0; i < componentCount; i++)
array[i] = reader.getUInt8(tagValueOffset + i);
handler.setInt8uArray(tagId, array);
}
break;
case TiffDataFormat.CODE_INT16_S:
if (componentCount == 1) {
handler.setInt16s(tagId, (int)reader.getInt16(tagValueOffset));
} else {
short[] array = new short[componentCount];
for (int i = 0; i < componentCount; i++)
array[i] = reader.getInt16(tagValueOffset + (i * 2));
handler.setInt16sArray(tagId, array);
}
break;
case TiffDataFormat.CODE_INT16_U:
if (componentCount == 1) {
handler.setInt16u(tagId, reader.getUInt16(tagValueOffset));
} else {
int[] array = new int[componentCount];
for (int i = 0; i < componentCount; i++)
array[i] = reader.getUInt16(tagValueOffset + (i * 2));
handler.setInt16uArray(tagId, array);
}
break;
case TiffDataFormat.CODE_INT32_S:
// NOTE 'long' in this case means 32 bit, not 64
if (componentCount == 1) {
handler.setInt32s(tagId, reader.getInt32(tagValueOffset));
} else {
int[] array = new int[componentCount];
for (int i = 0; i < componentCount; i++)
array[i] = reader.getInt32(tagValueOffset + (i * 4));
handler.setInt32sArray(tagId, array);
}
break;
case TiffDataFormat.CODE_INT32_U:
// NOTE 'long' in this case means 32 bit, not 64
if (componentCount == 1) {
handler.setInt32u(tagId, reader.getUInt32(tagValueOffset));
} else {
long[] array = new long[componentCount];
for (int i = 0; i < componentCount; i++)
array[i] = reader.getUInt32(tagValueOffset + (i * 4));
handler.setInt32uArray(tagId, array);
}
break;
default:
handler.error(String.format("Invalid TIFF tag format code %d for tag 0x%04X", formatCode, tagId));
}
}
/**
* Determine the offset of a given tag within the specified IFD.
*
* @param ifdStartOffset the offset at which the IFD starts
* @param entryNumber the zero-based entry number
*/
private static int calculateTagOffset(int ifdStartOffset, int entryNumber)
{
// Add 2 bytes for the tag count.
// Each entry is 12 bytes.
return ifdStartOffset + 2 + (12 * entryNumber);
}
}
|