File: pyimod01_archive.py

package info (click to toggle)
pyinstaller 6.18.0%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 11,824 kB
  • sloc: python: 41,828; ansic: 12,123; makefile: 171; sh: 131; xml: 19
file content (140 lines) | stat: -rw-r--r-- 5,518 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------

# **NOTE** This module is used during bootstrap.
# Import *ONLY* builtin modules or modules that are collected into the base_library.zip archive.
# List of built-in modules: sys.builtin_module_names
# List of modules collected into base_library.zip: PyInstaller.compat.PY3_BASE_MODULES

import os
import struct
import marshal
import zlib

# In Python3, the MAGIC_NUMBER value is available in the importlib module. However, in the bootstrap phase we cannot use
# importlib directly, but rather its frozen variant.
import _frozen_importlib

PYTHON_MAGIC_NUMBER = _frozen_importlib._bootstrap_external.MAGIC_NUMBER

# Type codes for PYZ PYZ entries
PYZ_ITEM_MODULE = 0
PYZ_ITEM_PKG = 1
PYZ_ITEM_DATA = 2  # deprecated; PYZ does not contain any data entries anymore
PYZ_ITEM_NSPKG = 3  # PEP-420 namespace package


class ArchiveReadError(RuntimeError):
    pass


class ZlibArchiveReader:
    """
    Reader for PyInstaller's PYZ (ZlibArchive) archive. The archive is used to store collected byte-compiled Python
    modules, as individually-compressed entries.
    """
    _PYZ_MAGIC_PATTERN = b'PYZ\0'

    def __init__(self, filename, start_offset=None, check_pymagic=False):
        self._filename = filename
        self._start_offset = start_offset

        self.toc = {}

        # If no offset is given, try inferring it from filename
        if start_offset is None:
            self._filename, self._start_offset = self._parse_offset_from_filename(filename)

        # Parse header and load TOC. Standard header contains 12 bytes: PYZ magic pattern, python bytecode magic
        # pattern, and offset to TOC (32-bit integer). It might be followed by additional fields, depending on
        # implementation version.
        with open(self._filename, "rb") as fp:
            # Read PYZ magic pattern, located at the start of the file
            fp.seek(self._start_offset, os.SEEK_SET)

            magic = fp.read(len(self._PYZ_MAGIC_PATTERN))
            if magic != self._PYZ_MAGIC_PATTERN:
                raise ArchiveReadError("PYZ magic pattern mismatch!")

            # Read python magic/version number
            pymagic = fp.read(len(PYTHON_MAGIC_NUMBER))
            if check_pymagic and pymagic != PYTHON_MAGIC_NUMBER:
                raise ArchiveReadError("Python magic pattern mismatch!")

            # Read TOC offset
            toc_offset, *_ = struct.unpack('!i', fp.read(4))

            # Load TOC
            fp.seek(self._start_offset + toc_offset, os.SEEK_SET)
            self.toc = dict(marshal.load(fp))

    @staticmethod
    def _parse_offset_from_filename(filename):
        """
        Parse the numeric offset from filename, stored as: `/path/to/file?offset`.
        """
        offset = 0

        idx = filename.rfind('?')
        if idx == -1:
            return filename, offset

        try:
            offset = int(filename[idx + 1:])
            filename = filename[:idx]  # Remove the offset from filename
        except ValueError:
            # Ignore spurious "?" in the path (for example, like in Windows UNC \\?\<path>).
            pass

        return filename, offset

    def extract(self, name, raw=False):
        """
        Extract data from entry with the given name.

        If the entry belongs to a module or a package, the data is loaded (unmarshaled) into code object. To retrieve
        raw data, set `raw` flag to True.
        """
        # Look up entry
        entry = self.toc.get(name)
        if entry is None:
            raise KeyError(f"No entry named {name!r} found in the archive!")

        typecode, entry_offset, entry_length = entry

        # PEP-420 namespace package does not have a data blob.
        if typecode == PYZ_ITEM_NSPKG:
            return None

        # Read data blob
        try:
            with open(self._filename, "rb") as fp:
                fp.seek(self._start_offset + entry_offset)
                obj = fp.read(entry_length)
        except FileNotFoundError:
            # We open the archive file each time we need to read from it, to avoid locking the file by keeping it open.
            # This allows executable to be deleted or moved (renamed) while it is running, which is useful in certain
            # scenarios (e.g., automatic update that replaces the executable). The caveat is that once the executable is
            # renamed, we cannot read from its embedded PYZ archive anymore. In such case, exit with informative
            # message.
            raise SystemExit(
                f"ERROR: {self._filename} appears to have been moved or deleted since this application was launched. "
                "Continouation from this state is impossible. Exiting now."
            )

        try:
            obj = zlib.decompress(obj)
            if typecode in (PYZ_ITEM_MODULE, PYZ_ITEM_PKG) and not raw:
                obj = marshal.loads(obj)
        except EOFError as e:
            raise ImportError(f"Failed to unmarshal PYZ entry {name!r}!") from e

        return obj