1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
from typing import BinaryIO
from exifread.utils import ord_
from exifread.exif_log import get_logger
from exifread.exceptions import InvalidExif
logger = get_logger()
def _increment_base(data, base):
return ord_(data[base + 2]) * 256 + ord_(data[base + 3]) + 2
def _get_initial_base(fh: BinaryIO, data, fake_exif) -> tuple:
base = 2
logger.debug("data[2]=0x%X data[3]=0x%X data[6:10]=%s", ord_(data[2]), ord_(data[3]), data[6:10])
while ord_(data[2]) == 0xFF and data[6:10] in (b"JFIF", b"JFXX", b"OLYM", b"Phot"):
length = ord_(data[4]) * 256 + ord_(data[5])
logger.debug(" Length offset is %s", length)
fh.read(length - 8)
# fake an EXIF beginning of file
# I don't think this is used. --gd
data = b"\xFF\x00" + fh.read(10)
fake_exif = 1
if base > 2:
logger.debug(" Added to base")
base = base + length + 4 - 2
else:
logger.debug(" Added to zero")
base = length + 4
logger.debug(" Set segment base to 0x%X", base)
return base, fake_exif
def _get_base(base, data) -> int:
# pylint: disable=too-many-statements
while True:
logger.debug(" Segment base 0x%X", base)
if data[base : base + 2] == b"\xFF\xE1":
# APP1
logger.debug(" APP1 at base 0x%X", base)
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]), ord_(data[base + 3]))
logger.debug(" Code: %s", data[base + 4 : base + 8])
if data[base + 4 : base + 8] == b"Exif":
logger.debug(" Decrement base by 2 to get to pre-segment header (for compatibility with later code)")
base -= 2
break
increment = _increment_base(data, base)
logger.debug(" Increment base by %s", increment)
base += increment
elif data[base : base + 2] == b"\xFF\xE0":
# APP0
logger.debug(" APP0 at base 0x%X", base)
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]), ord_(data[base + 3]))
logger.debug(" Code: %s", data[base + 4 : base + 8])
increment = _increment_base(data, base)
logger.debug(" Increment base by %s", increment)
base += increment
elif data[base : base + 2] == b"\xFF\xE2":
# APP2
logger.debug(" APP2 at base 0x%X", base)
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]), ord_(data[base + 3]))
logger.debug(" Code: %s", data[base + 4 : base + 8])
increment = _increment_base(data, base)
logger.debug(" Increment base by %s", increment)
base += increment
elif data[base : base + 2] == b"\xFF\xEE":
# APP14
logger.debug(" APP14 Adobe segment at base 0x%X", base)
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]), ord_(data[base + 3]))
logger.debug(" Code: %s", data[base + 4 : base + 8])
increment = _increment_base(data, base)
logger.debug(" Increment base by %s", increment)
base += increment
logger.debug(" There is useful EXIF-like data here, but we have no parser for it.")
elif data[base : base + 2] == b"\xFF\xDB":
logger.debug(" JPEG image data at base 0x%X No more segments are expected.", base)
break
elif data[base : base + 2] == b"\xFF\xD8":
# APP12
logger.debug(" FFD8 segment at base 0x%X", base)
logger.debug(
" Got 0x%X 0x%X and %s instead", ord_(data[base]), ord_(data[base + 1]), data[4 + base : 10 + base]
)
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]), ord_(data[base + 3]))
logger.debug(" Code: %s", data[base + 4 : base + 8])
increment = _increment_base(data, base)
logger.debug(" Increment base by %s", increment)
base += increment
elif data[base : base + 2] == b"\xFF\xEC":
# APP12
logger.debug(" APP12 XMP (Ducky) or Pictureinfo segment at base 0x%X", base)
logger.debug(" Got 0x%X and 0x%X instead", ord_(data[base]), ord_(data[base + 1]))
logger.debug(" Length: 0x%X 0x%X", ord_(data[base + 2]), ord_(data[base + 3]))
logger.debug("Code: %s", data[base + 4 : base + 8])
increment = _increment_base(data, base)
logger.debug(" Increment base by %s", increment)
base += increment
logger.debug(
" There is useful EXIF-like data here (quality, comment, copyright), " "but we have no parser for it."
)
else:
try:
increment = _increment_base(data, base)
logger.debug(" Got 0x%X and 0x%X instead", ord_(data[base]), ord_(data[base + 1]))
except IndexError as err:
raise InvalidExif("Unexpected/unhandled segment type or file content.") from err
else:
logger.debug(" Increment base by %s", increment)
base += increment
return base
def find_jpeg_exif(fh: BinaryIO, data, fake_exif) -> tuple:
logger.debug("JPEG format recognized data[0:2]=0x%X%X", ord_(data[0]), ord_(data[1]))
base, fake_exif = _get_initial_base(fh, data, fake_exif)
# Big ugly patch to deal with APP2 (or other) data coming before APP1
fh.seek(0)
# in theory, this could be insufficient since 64K is the maximum size--gd
data = fh.read(base + 4000)
base = _get_base(base, data)
fh.seek(base + 12)
if ord_(data[2 + base]) == 0xFF and data[6 + base : 10 + base] == b"Exif":
# detected EXIF header
offset = fh.tell()
endian = fh.read(1)
# HACK TEST: endian = 'M'
elif ord_(data[2 + base]) == 0xFF and data[6 + base : 10 + base + 1] == b"Ducky":
# detected Ducky header.
logger.debug(
"EXIF-like header (normally 0xFF and code): 0x%X and %s",
ord_(data[2 + base]),
data[6 + base : 10 + base + 1],
)
offset = fh.tell()
endian = fh.read(1)
elif ord_(data[2 + base]) == 0xFF and data[6 + base : 10 + base + 1] == b"Adobe":
# detected APP14 (Adobe)
logger.debug(
"EXIF-like header (normally 0xFF and code): 0x%X and %s",
ord_(data[2 + base]),
data[6 + base : 10 + base + 1],
)
offset = fh.tell()
endian = fh.read(1)
else:
# no EXIF information
msg = "No EXIF header expected data[2+base]==0xFF and data[6+base:10+base]===Exif (or Duck)"
msg += "Did get 0x%X and %s" % (ord_(data[2 + base]), data[6 + base : 10 + base + 1])
raise InvalidExif(msg)
return offset, endian, fake_exif
|