File: test_bytecode.py

package info (click to toggle)
pyinstaller 6.13.0%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 11,520 kB
  • sloc: python: 41,347; ansic: 11,334; makefile: 176; sh: 136; xml: 19
file content (222 lines) | stat: -rw-r--r-- 7,151 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#-----------------------------------------------------------------------------
# Copyright (c) 2021-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Tests for PyInstaller.depend.bytecode
"""

import re
from types import CodeType
from textwrap import dedent, indent
import operator

from PyInstaller.depend.bytecode import (
    function_calls,
    recursive_function_calls,
    any_alias,
    finditer,
)


def compile_(x):
    return compile(dedent(x), "<no file>", "exec")


def many_constants():
    """
    Generate Python code that includes >256 constants.
    """
    return "".join(f'a = {i}\n' for i in range(300))


def many_globals():
    """
    Generate Python code that includes >256 of global identifiers.
    """
    return " = ".join(f"a_{i}" for i in range(300)) + " = 'hello'\n"


def many_arguments():
    """
    Generate a function call taking >256 arguments.
    """
    return 'foo({})\n'.format(", ".join(map(str, range(300))))


def in_a_function(body):
    """
    Define a function called function() containing **body**.
    """
    return "def function():\n" + indent(body, "    ") + "\n"


# Sanity check that no fancy bytecode optimisation causes code from either of the above functions to be automatically
# removed as redundant by the compiler.


def test_many_constants():
    code: CodeType = compile_(many_constants())
    # Only the variable name 'a'.
    assert code.co_names == ('a',)

    # 1000 integers plus a 'None' return.
    assert len(code.co_consts) == 301


def test_many_globals():
    code: CodeType = compile_(many_globals())
    assert len(code.co_names) == 300
    assert len(code.co_consts) == 2


def test_global_functions():
    """
    Test finding function calls in the global namespace.
    """

    # The simplest possible function call.
    code = compile_("foo()")
    assert function_calls(code) == [('foo', [])]

    # With arguments.
    code = compile_("foo('a')")
    assert function_calls(code) == [('foo', ['a'])]

    # Having >256 constants will take us into extended arg territory where multiple byte-pair instructions are needed
    # to reference the constant. If everything works, we should not notice the difference.
    code = compile_(many_constants() + "foo(.123)")
    assert function_calls(code) == [('foo', [.123])]

    # Similarly, >256 global names also requires special handling.
    code = compile_(many_globals() + "foo(.456)")
    assert function_calls(code) == [('foo', [.456])]

    # And the unlikely case of >256 arguments to one function call.
    code = compile_(many_arguments())
    assert function_calls(code) == [('foo', list(range(300)))]

    # For loops, if statements should work. The iterable in a comprehension loop works but the statement to be executed
    # repeatedly gets its own code object and therefore requires recursion (tested later).
    code = compile_(
        """
        for i in foo(1, 2):
            a = bar(3)
            if wop(4) > whip(5):
                whiz(6)
                [7 for i in whallop(8)]
        """
    )
    assert function_calls(code) == [
        ("foo", [1, 2]),
        ("bar", [3]),
        ("wop", [4]),
        ("whip", [5]),
        ("whiz", [6]),
        ("whallop", [8]),
    ]

    # These are not supported but should be silently ignored without unintentional errors:
    assert function_calls(compile_("foo(x)")) == []
    assert function_calls(compile_("foo(a='3')")) == []
    assert function_calls(compile_("foo(bar())")) == [('bar', [])]

    # Python's compiler evaluates arithmetic.
    out = function_calls(compile_("foo(1 + 1)"))
    if out:
        # However, I will not bank on this being guaranteed behaviour.
        assert out == [("foo", [2])]

    assert function_calls(compile_("foo.bar()")) == [("foo.bar", [])]
    assert function_calls(compile_("foo.bar.pop.whack('a', 'b')")) == [("foo.bar.pop.whack", ['a', 'b'])]


def test_nested_codes():
    """
    Test function_calls() on global functions in nested code objects (bodies of other functions).
    """

    # The following compile() creates 3 code objects:
    #   - A global code.
    #   = The contents of foo().
    #   - And the body of the embedded lambda.

    code = compile_(
        """
        def foo():
            bar()
            whoop = lambda : fizz(3)
            return range(10)
        """
    )
    # There are no function calls in the global code.
    assert function_calls(code) == []

    # Get the body of foo().
    foo_code, = (i for i in code.co_consts if isinstance(i, CodeType))
    # foo() contains bar() and the iterable of the comprehension loop.
    assert function_calls(foo_code) == [('bar', []), ('range', [10])]

    # Get the body of the embedded lambda.
    lambda_code = next(i for i in foo_code.co_consts if isinstance(i, CodeType))
    # This contains fizz(3).
    assert function_calls(lambda_code) == [('fizz', [3])]

    assert recursive_function_calls(code) == {
        code: [],
        foo_code: [('bar', []), ('range', [10])],
        lambda_code: [('fizz', [3])],
    }


def test_local_functions():
    """
    Test on purely local functions. I.e., the function was imported and called inside the body of another function.
    """
    code_ = compile_(
        in_a_function(
            """
            a = 3
            import foo, zap
            zap.pop(), foo.bar()
            """
        )
    )

    code: CodeType
    code, = (i for i in code_.co_consts if isinstance(i, CodeType))

    # This test may mistakenly pass if co_names and co_varnames can be mixed up.
    # Ensure co_names[i] != co_varnames[i] holds for all `i`.
    assert all(map(operator.ne, code.co_names, code.co_varnames))

    assert function_calls(code) == [('zap.pop', []), ('foo.bar', [])]


def test_any_alias():
    assert tuple(any_alias("foo.bar.pop")) == ("foo.bar.pop", "bar.pop", "pop")


def test_finditer():
    """
    Test that bytecode.finditer() yields matches only that start on an even byte (``match.start() % 2 == 0``).

    There are 3 permutations here when considering a match:
    - A match starts on an even byte:
        That's good! Include that sequence.
    - A single character match starts on an odd byte:
        Ignore it. It's a false positive.
    - A multi-character match starts on an odd byte:
        This match will be a false positive but there may be a genuine match shortly afterwards (in the case of the
        # test below - it'll be the next character) which overlaps with this one so we must override regex's
        behaviour of ignoring overlapping matches to prevent these from getting lost.
    """
    matches = list(finditer(re.compile(rb"\d+"), b"0123 4567 890 12 3 4"))
    aligned = [i.group() for i in matches]
    assert aligned == [b"0123", b"567", b"890", b"12"]