File: jpeg.py

package info (click to toggle)
python-exif 3.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 284 kB
  • sloc: python: 2,969; makefile: 47
file content (155 lines) | stat: -rw-r--r-- 6,879 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
from typing import BinaryIO

from exifread.utils import ord_
from exifread.exif_log import get_logger
from exifread.exceptions import InvalidExif

logger = get_logger()


def _increment_base(data, base):
    return ord_(data[base + 2]) * 256 + ord_(data[base + 3]) + 2


def _get_initial_base(fh: BinaryIO, data, fake_exif) -> tuple:
    base = 2
    logger.debug("data[2]=0x%X data[3]=0x%X data[6:10]=%s", ord_(data[2]), ord_(data[3]), data[6:10])
    while ord_(data[2]) == 0xFF and data[6:10] in (b"JFIF", b"JFXX", b"OLYM", b"Phot"):
        length = ord_(data[4]) * 256 + ord_(data[5])
        logger.debug(" Length offset is %s", length)
        fh.read(length - 8)
        # fake an EXIF beginning of file
        # I don't think this is used. --gd
        data = b"\xFF\x00" + fh.read(10)
        fake_exif = 1
        if base > 2:
            logger.debug(" Added to base")
            base = base + length + 4 - 2
        else:
            logger.debug(" Added to zero")
            base = length + 4
        logger.debug(" Set segment base to 0x%X", base)
    return base, fake_exif


def _get_base(base, data) -> int:
    # pylint: disable=too-many-statements
    while True:
        logger.debug(" Segment base 0x%X", base)
        if data[base : base + 2] == b"\xFF\xE1":
            # APP1
            logger.debug("  APP1 at base 0x%X", base)
            logger.debug("  Length: 0x%X 0x%X", ord_(data[base + 2]), ord_(data[base + 3]))
            logger.debug("  Code: %s", data[base + 4 : base + 8])
            if data[base + 4 : base + 8] == b"Exif":
                logger.debug("  Decrement base by 2 to get to pre-segment header (for compatibility with later code)")
                base -= 2
                break
            increment = _increment_base(data, base)
            logger.debug(" Increment base by %s", increment)
            base += increment
        elif data[base : base + 2] == b"\xFF\xE0":
            # APP0
            logger.debug("  APP0 at base 0x%X", base)
            logger.debug("  Length: 0x%X 0x%X", ord_(data[base + 2]), ord_(data[base + 3]))
            logger.debug("  Code: %s", data[base + 4 : base + 8])
            increment = _increment_base(data, base)
            logger.debug(" Increment base by %s", increment)
            base += increment
        elif data[base : base + 2] == b"\xFF\xE2":
            # APP2
            logger.debug("  APP2 at base 0x%X", base)
            logger.debug("  Length: 0x%X 0x%X", ord_(data[base + 2]), ord_(data[base + 3]))
            logger.debug(" Code: %s", data[base + 4 : base + 8])
            increment = _increment_base(data, base)
            logger.debug(" Increment base by %s", increment)
            base += increment
        elif data[base : base + 2] == b"\xFF\xEE":
            # APP14
            logger.debug("  APP14 Adobe segment at base 0x%X", base)
            logger.debug("  Length: 0x%X 0x%X", ord_(data[base + 2]), ord_(data[base + 3]))
            logger.debug("  Code: %s", data[base + 4 : base + 8])
            increment = _increment_base(data, base)
            logger.debug(" Increment base by %s", increment)
            base += increment
            logger.debug("  There is useful EXIF-like data here, but we have no parser for it.")
        elif data[base : base + 2] == b"\xFF\xDB":
            logger.debug("  JPEG image data at base 0x%X No more segments are expected.", base)
            break
        elif data[base : base + 2] == b"\xFF\xD8":
            # APP12
            logger.debug("  FFD8 segment at base 0x%X", base)
            logger.debug(
                "  Got 0x%X 0x%X and %s instead", ord_(data[base]), ord_(data[base + 1]), data[4 + base : 10 + base]
            )
            logger.debug("  Length: 0x%X 0x%X", ord_(data[base + 2]), ord_(data[base + 3]))
            logger.debug("  Code: %s", data[base + 4 : base + 8])
            increment = _increment_base(data, base)
            logger.debug("  Increment base by %s", increment)
            base += increment
        elif data[base : base + 2] == b"\xFF\xEC":
            # APP12
            logger.debug("  APP12 XMP (Ducky) or Pictureinfo segment at base 0x%X", base)
            logger.debug("  Got 0x%X and 0x%X instead", ord_(data[base]), ord_(data[base + 1]))
            logger.debug("  Length: 0x%X 0x%X", ord_(data[base + 2]), ord_(data[base + 3]))
            logger.debug("Code: %s", data[base + 4 : base + 8])
            increment = _increment_base(data, base)
            logger.debug("  Increment base by %s", increment)
            base += increment
            logger.debug(
                "  There is useful EXIF-like data here (quality, comment, copyright), " "but we have no parser for it."
            )
        else:
            try:
                increment = _increment_base(data, base)
                logger.debug("  Got 0x%X and 0x%X instead", ord_(data[base]), ord_(data[base + 1]))
            except IndexError as err:
                raise InvalidExif("Unexpected/unhandled segment type or file content.") from err
            else:
                logger.debug("  Increment base by %s", increment)
                base += increment
    return base


def find_jpeg_exif(fh: BinaryIO, data, fake_exif) -> tuple:
    logger.debug("JPEG format recognized data[0:2]=0x%X%X", ord_(data[0]), ord_(data[1]))

    base, fake_exif = _get_initial_base(fh, data, fake_exif)

    # Big ugly patch to deal with APP2 (or other) data coming before APP1
    fh.seek(0)
    # in theory, this could be insufficient since 64K is the maximum size--gd
    data = fh.read(base + 4000)

    base = _get_base(base, data)

    fh.seek(base + 12)
    if ord_(data[2 + base]) == 0xFF and data[6 + base : 10 + base] == b"Exif":
        # detected EXIF header
        offset = fh.tell()
        endian = fh.read(1)
        # HACK TEST:  endian = 'M'
    elif ord_(data[2 + base]) == 0xFF and data[6 + base : 10 + base + 1] == b"Ducky":
        # detected Ducky header.
        logger.debug(
            "EXIF-like header (normally 0xFF and code): 0x%X and %s",
            ord_(data[2 + base]),
            data[6 + base : 10 + base + 1],
        )
        offset = fh.tell()
        endian = fh.read(1)
    elif ord_(data[2 + base]) == 0xFF and data[6 + base : 10 + base + 1] == b"Adobe":
        # detected APP14 (Adobe)
        logger.debug(
            "EXIF-like header (normally 0xFF and code): 0x%X and %s",
            ord_(data[2 + base]),
            data[6 + base : 10 + base + 1],
        )
        offset = fh.tell()
        endian = fh.read(1)
    else:
        # no EXIF information
        msg = "No EXIF header expected data[2+base]==0xFF and data[6+base:10+base]===Exif (or Duck)"
        msg += "Did get 0x%X and %s" % (ord_(data[2 + base]), data[6 + base : 10 + base + 1])
        raise InvalidExif(msg)
    return offset, endian, fake_exif