File: test_bytecode.py

package info (click to toggle)
pyinstaller 6.18.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 11,820 kB
  • sloc: python: 41,828; ansic: 12,123; makefile: 171; sh: 131; xml: 19
file content (278 lines) | stat: -rw-r--r-- 10,053 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
#-----------------------------------------------------------------------------
# Copyright (c) 2021-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Tests for PyInstaller.depend.bytecode
"""

import re
from types import CodeType
from textwrap import dedent, indent
import operator

from PyInstaller import compat
from PyInstaller.depend.bytecode import (
    function_calls,
    recursive_function_calls,
    any_alias,
    finditer,
    _cleanup_bytecode_string,  # used for sanity check in test_finditer()
)


def compile_(x):
    return compile(dedent(x), "<no file>", "exec")


def many_int_constants():
    """
    Generate Python code that includes >256 integer constants.
    """
    # NOTE: in python >= 3.14.0a2, integer arguments smaller than 256 are pushed directly to stack, without using a
    # co_consts. Therefore, to effectively use >256 constants, we need to generate >512 integer arguments.
    return "".join(f'a = {i}\n' for i in range(600))


def many_str_constants():
    """
    Generate Python code that includes >256 string constants.
    """
    return "".join(f'a = "val_{i}"\n' for i in range(300))


def many_globals():
    """
    Generate Python code that includes >256 of global identifiers.
    """
    return " = ".join(f"a_{i}" for i in range(300)) + " = 'hello'\n"


def many_arguments():
    """
    Generate a function call taking >256 arguments.
    """
    return 'foo({})\n'.format(", ".join(map(str, range(300))))


def in_a_function(body):
    """
    Define a function called function() containing **body**.
    """
    return "def function():\n" + indent(body, "    ") + "\n"


# Sanity check that no fancy bytecode optimisation causes code from either of the above functions to be automatically
# removed as redundant by the compiler.


def test_many_int_constants():
    code: CodeType = compile_(many_int_constants())
    # Only the variable name 'a'.
    assert code.co_names == ('a',)

    # In python >= 3.14.0a2, LOAD_SMALL_INT instruction is used to push integers smaller than 256 on the stack, and
    # co_consts is used for larger values (in combination with LOAD_CONST / LOAD_CONST_IMMORTAL).
    # In earlier python versions, co_consts is used for all constants.
    if compat.is_py314:
        # In 3.14.0a7 the behavior was changed (by 55815a6); it seems that the value of very first LOAD_SMALL_INT is
        # added to co_consts for some reason. This does not happen if there is preceding LOAD_CONST (e.g., if there
        # is a docstring present before the code). In case this behavior change was unintended or is changed further,
        # check if co_consts contains 0 at first index and adjust expected length accordingly...
        expected_length = 601 - 256  # (600 - 256) integers plus a `None` return.
        if code.co_consts[0] == 0:
            expected_length += 1
        assert len(code.co_consts) == expected_length
    else:
        # 600 integers plus a 'None' return.
        assert len(code.co_consts) == 601


def test_many_str_constants():
    code: CodeType = compile_(many_str_constants())
    # Only the variable name 'a'.
    assert code.co_names == ('a',)

    # 300 string constants plus a 'None' return.
    assert len(code.co_consts) == 301


def test_many_globals():
    code: CodeType = compile_(many_globals())
    assert len(code.co_names) == 300
    assert len(code.co_consts) == 2


def test_global_functions():
    """
    Test finding function calls in the global namespace.
    """

    # The simplest possible function call.
    code = compile_("foo()")
    assert function_calls(code) == [('foo', [])]

    # With arguments.
    code = compile_("foo('a')")
    assert function_calls(code) == [('foo', ['a'])]

    # Having >256 constants will take us into extended arg territory where multiple byte-pair instructions are needed
    # to reference the constant. If everything works, we should not notice the difference.
    code = compile_(many_int_constants() + "foo(.123)")
    assert function_calls(code) == [('foo', [.123])]

    code = compile_(many_str_constants() + "foo(.321)")
    assert function_calls(code) == [('foo', [.321])]

    # Similarly, >256 global names also requires special handling.
    code = compile_(many_globals() + "foo(.456)")
    assert function_calls(code) == [('foo', [.456])]

    # And the unlikely case of >256 arguments to one function call.
    #
    # NOTE: with python >= 3.14.0a5, this creates a list with a sequence
    # of BUILD_LIST, LOAD*, LIST_APPEND opcodes, followed by a
    # CALL_INTRINSIC_1 opcode with INTRINSIC_LIST_TO_TUPLE argument,
    # and CALL_FUNCTION_EX opcode.
    #
    # Since we have no real use case for such lists, perform the
    # test only on earlier python versions.
    if not compat.is_py314:
        code = compile_(many_arguments())
        assert function_calls(code) == [('foo', list(range(300)))]

    # For loops, if statements should work. The iterable in a comprehension loop works but the statement to be executed
    # repeatedly gets its own code object and therefore requires recursion (tested later).
    code = compile_(
        """
        for i in foo(1, 2):
            a = bar(3)
            if wop(4) > whip(5):
                whiz(6)
                [7 for i in whallop(8)]
        """
    )
    assert function_calls(code) == [
        ("foo", [1, 2]),
        ("bar", [3]),
        ("wop", [4]),
        ("whip", [5]),
        ("whiz", [6]),
        ("whallop", [8]),
    ]

    # These are not supported but should be silently ignored without unintentional errors:
    assert function_calls(compile_("foo(x)")) == []
    assert function_calls(compile_("foo(a='3')")) == []
    assert function_calls(compile_("foo(bar())")) == [('bar', [])]

    # Python's compiler evaluates arithmetic.
    out = function_calls(compile_("foo(1 + 1)"))
    if out:
        # However, I will not bank on this being guaranteed behaviour.
        assert out == [("foo", [2])]

    assert function_calls(compile_("foo.bar()")) == [("foo.bar", [])]
    assert function_calls(compile_("foo.bar.pop.whack('a', 'b')")) == [("foo.bar.pop.whack", ['a', 'b'])]


def test_nested_codes():
    """
    Test function_calls() on global functions in nested code objects (bodies of other functions).
    """

    # The following compile() creates 3 code objects:
    #   - A global code.
    #   = The contents of foo().
    #   - And the body of the embedded lambda.

    code = compile_(
        """
        def foo():
            bar()
            whoop = lambda : fizz(3)
            return range(10)
        """
    )
    # There are no function calls in the global code.
    assert function_calls(code) == []

    # Get the body of foo().
    foo_code, = (i for i in code.co_consts if isinstance(i, CodeType))
    # foo() contains bar() and the iterable of the comprehension loop.
    assert function_calls(foo_code) == [('bar', []), ('range', [10])]

    # Get the body of the embedded lambda.
    lambda_code = next(i for i in foo_code.co_consts if isinstance(i, CodeType))
    # This contains fizz(3).
    assert function_calls(lambda_code) == [('fizz', [3])]

    assert recursive_function_calls(code) == {
        code: [],
        foo_code: [('bar', []), ('range', [10])],
        lambda_code: [('fizz', [3])],
    }


def test_local_functions():
    """
    Test on purely local functions. I.e., the function was imported and called inside the body of another function.
    """
    code_ = compile_(
        in_a_function(
            """
            a = 3
            import foo, zap
            zap.pop(), foo.bar()
            """
        )
    )

    code: CodeType
    code, = (i for i in code_.co_consts if isinstance(i, CodeType))

    # This test may mistakenly pass if co_names and co_varnames can be mixed up.
    # Ensure co_names[i] != co_varnames[i] holds for all `i`.
    assert all(map(operator.ne, code.co_names, code.co_varnames))

    assert function_calls(code) == [('zap.pop', []), ('foo.bar', [])]


def test_any_alias():
    assert tuple(any_alias("foo.bar.pop")) == ("foo.bar.pop", "bar.pop", "pop")


def test_finditer():
    """
    Test that bytecode.finditer() yields matches only that start on an even byte (``match.start() % 2 == 0``).

    There are 3 permutations here when considering a match:
    - A match starts on an even byte:
        That's good! Include that sequence.
    - A single character match starts on an odd byte:
        Ignore it. It's a false positive.
    - A multi-character match starts on an odd byte:
        This match will be a false positive but there may be a genuine match shortly afterwards (in the case of the
        # test below - it'll be the next character) which overlaps with this one so we must override regex's
        behaviour of ignoring overlapping matches to prevent these from getting lost.
    """

    # separator: 0xFF
    sample_string = b"0123\xFF4567\xFF890\xFF12\xFF3\xFF4"

    # Sanity check - ensure that none of the characters in the sample string coincide with the opcodes that `finditer()`
    # filters out via call to `_cleanup_bytecode_string()` (e.g., CACHE, PUSH_NULL), If that is the case, we need to
    # pick up new separator to avoid disturbing the test. For example, original separator was space character, but its
    # ordinal code (32) coincides with PUSH_NULL opcode in python 3.14.0a7.
    assert sample_string == _cleanup_bytecode_string(sample_string), \
        "One of characters in input string coincides with filtered-out opcode!"

    matches = list(finditer(re.compile(rb"\d+"), sample_string))
    aligned = [i.group() for i in matches]
    assert aligned == [b"0123", b"567", b"890", b"12"]