File: gchead-alignment.diff

package info (click to toggle)
python3.14 3.14.0-5
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 169,680 kB
  • sloc: python: 751,968; ansic: 717,163; xml: 31,250; sh: 5,989; cpp: 4,063; makefile: 1,995; objc: 787; lisp: 502; javascript: 136; asm: 75; csh: 12
file content (364 lines) | stat: -rw-r--r-- 15,056 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
From 49d72365cd2d6c09a154a9a061efef4130e2c758 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 11 Jun 2025 12:44:58 +0200
Subject: [PATCH] gh-127545: Add _Py_ALIGNED_DEF(N, T) and use it for PyObject
 (GH-135209)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Replace _Py_ALIGN_AS(V) by _Py_ALIGNED_DEF(N, T)

This is now a common façade for the various `_Alignas` alternatives,
which behave in interesting ways -- see the source comment.

The new macro (and MSVC's `__declspec(align)`) should not be used
on a variable/member declaration that includes a struct declaraton.
A workaround is to separate the struct definition.
Do that for `PyASCIIObject.state`.

* Specify minimum PyGC_Head and PyObject alignment

As documented in InternalDocs/garbage_collector.md, the garbage collector
stores flags in the least significant two bits of the _gc_prev pointer
in struct PyGC_Head. Consequently, this pointer is only capable of storing
a location that's aligned to a 4-byte boundary.

Encode this requirement using _Py_ALIGNED_DEF.

This patch fixes a segfault in m68k, which was previously investigated
by Adrian Glaubitz here:
https://lists.debian.org/debian-68k/2024/11/msg00020.html
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1087600
Original patch (using the GCC-only Py_ALIGNED) by Finn Thain.

Co-authored-by: Finn Thain <fthain@linux-m68k.org>
Co-authored-by: Victor Stinner <vstinner@python.org>
Co-authored-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>

Origin: Upstream, https://github.com/python/cpython/commit/49d72365cd2d6c09a154a9a061efef4130e2c758
Bug-Upstream: https://github.com/python/cpython/issues/127545
Bug-Debian: https://bugs.debian.org/1105110
---
 Include/Python.h                              |  13 --
 Include/cpython/unicodeobject.h               | 120 +++++++++---------
 Include/internal/pycore_interp_structs.h      |   3 +-
 Include/object.h                              |   9 +-
 Include/pymacro.h                             |  82 +++++++-----
 ...-12-04-10-00-35.gh-issue-127545.t0THjE.rst |   1 +
 6 files changed, 122 insertions(+), 106 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Build/2024-12-04-10-00-35.gh-issue-127545.t0THjE.rst

--- a/Include/Python.h
+++ b/Include/Python.h
@@ -60,14 +60,6 @@
 #  include <intrin.h>             // __readgsqword()
 #endif
 
-// Suppress known warnings in Python header files.
-#if defined(_MSC_VER)
-// Warning that alignas behaviour has changed. Doesn't affect us, because we
-// never relied on the old behaviour.
-#pragma warning(push)
-#pragma warning(disable: 5274)
-#endif
-
 // Include Python header files
 #include "pyport.h"
 #include "pymacro.h"
@@ -147,9 +139,4 @@
 #include "cpython/pyfpe.h"
 #include "cpython/tracemalloc.h"
 
-// Restore warning filter
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
 #endif /* !Py_PYTHON_H */
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -47,6 +47,63 @@ static inline Py_UCS4 Py_UNICODE_LOW_SUR
 
 /* --- Unicode Type ------------------------------------------------------- */
 
+struct _PyUnicodeObject_state {
+    /* If interned is non-zero, the two references from the
+       dictionary to this object are *not* counted in ob_refcnt.
+       The possible values here are:
+           0: Not Interned
+           1: Interned
+           2: Interned and Immortal
+           3: Interned, Immortal, and Static
+       This categorization allows the runtime to determine the right
+       cleanup mechanism at runtime shutdown. */
+#ifdef Py_GIL_DISABLED
+    // Needs to be accessed atomically, so can't be a bit field.
+    unsigned char interned;
+#else
+    unsigned int interned:2;
+#endif
+    /* Character size:
+
+       - PyUnicode_1BYTE_KIND (1):
+
+         * character type = Py_UCS1 (8 bits, unsigned)
+         * all characters are in the range U+0000-U+00FF (latin1)
+         * if ascii is set, all characters are in the range U+0000-U+007F
+         (ASCII), otherwise at least one character is in the range
+         U+0080-U+00FF
+
+       - PyUnicode_2BYTE_KIND (2):
+
+         * character type = Py_UCS2 (16 bits, unsigned)
+         * all characters are in the range U+0000-U+FFFF (BMP)
+         * at least one character is in the range U+0100-U+FFFF
+
+       - PyUnicode_4BYTE_KIND (4):
+
+         * character type = Py_UCS4 (32 bits, unsigned)
+         * all characters are in the range U+0000-U+10FFFF
+         * at least one character is in the range U+10000-U+10FFFF
+       */
+    unsigned int kind:3;
+    /* Compact is with respect to the allocation scheme. Compact unicode
+       objects only require one memory block while non-compact objects use
+       one block for the PyUnicodeObject struct and another for its data
+       buffer. */
+    unsigned int compact:1;
+    /* The string only contains characters in the range U+0000-U+007F (ASCII)
+       and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
+       set, use the PyASCIIObject structure. */
+    unsigned int ascii:1;
+    /* The object is statically allocated. */
+    unsigned int statically_allocated:1;
+#ifndef Py_GIL_DISABLED
+    /* Historical: padding to ensure that PyUnicode_DATA() is always aligned to
+       4 bytes (see issue gh-63736 on m68k) */
+    unsigned int :24;
+#endif
+};
+
 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
    structure. state.ascii and state.compact are set, and the data
    immediately follow the structure. utf8_length can be found
@@ -99,67 +156,8 @@ typedef struct {
     PyObject_HEAD
     Py_ssize_t length;          /* Number of code points in the string */
     Py_hash_t hash;             /* Hash value; -1 if not set */
-#ifdef Py_GIL_DISABLED
-    /* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k.
-       In the non-free-threaded build, we'll use explicit padding instead */
-   _Py_ALIGN_AS(4)
-#endif
-    struct {
-        /* If interned is non-zero, the two references from the
-           dictionary to this object are *not* counted in ob_refcnt.
-           The possible values here are:
-               0: Not Interned
-               1: Interned
-               2: Interned and Immortal
-               3: Interned, Immortal, and Static
-           This categorization allows the runtime to determine the right
-           cleanup mechanism at runtime shutdown. */
-#ifdef Py_GIL_DISABLED
-        // Needs to be accessed atomically, so can't be a bit field.
-        unsigned char interned;
-#else
-        unsigned int interned:2;
-#endif
-        /* Character size:
-
-           - PyUnicode_1BYTE_KIND (1):
-
-             * character type = Py_UCS1 (8 bits, unsigned)
-             * all characters are in the range U+0000-U+00FF (latin1)
-             * if ascii is set, all characters are in the range U+0000-U+007F
-               (ASCII), otherwise at least one character is in the range
-               U+0080-U+00FF
-
-           - PyUnicode_2BYTE_KIND (2):
-
-             * character type = Py_UCS2 (16 bits, unsigned)
-             * all characters are in the range U+0000-U+FFFF (BMP)
-             * at least one character is in the range U+0100-U+FFFF
-
-           - PyUnicode_4BYTE_KIND (4):
-
-             * character type = Py_UCS4 (32 bits, unsigned)
-             * all characters are in the range U+0000-U+10FFFF
-             * at least one character is in the range U+10000-U+10FFFF
-         */
-        unsigned int kind:3;
-        /* Compact is with respect to the allocation scheme. Compact unicode
-           objects only require one memory block while non-compact objects use
-           one block for the PyUnicodeObject struct and another for its data
-           buffer. */
-        unsigned int compact:1;
-        /* The string only contains characters in the range U+0000-U+007F (ASCII)
-           and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
-           set, use the PyASCIIObject structure. */
-        unsigned int ascii:1;
-        /* The object is statically allocated. */
-        unsigned int statically_allocated:1;
-#ifndef Py_GIL_DISABLED
-        /* Padding to ensure that PyUnicode_DATA() is always aligned to
-           4 bytes (see issue gh-63736 on m68k) */
-        unsigned int :24;
-#endif
-    } state;
+    /* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k. */
+   _Py_ALIGNED_DEF(4, struct _PyUnicodeObject_state) state;
 } PyASCIIObject;
 
 /* Non-ASCII strings allocated through PyUnicode_New use the
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -159,10 +159,11 @@ struct atexit_state {
 typedef struct {
     // Tagged pointer to next object in the list.
     // 0 means the object is not tracked
-    uintptr_t _gc_next;
+    _Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) _gc_next;
 
     // Tagged pointer to previous object in the list.
     // Lowest two bits are used for flags documented later.
+    // Those bits are made available by the struct's minimum alignment.
     uintptr_t _gc_prev;
 } PyGC_Head;
 
--- a/Include/object.h
+++ b/Include/object.h
@@ -101,6 +101,12 @@ whose size is determined when the object
 #define PyObject_VAR_HEAD      PyVarObject ob_base;
 #define Py_INVALID_SIZE (Py_ssize_t)-1
 
+/* PyObjects are given a minimum alignment so that the least significant bits
+ * of an object pointer become available for other purposes.
+ * This must be an integer literal with the value (1 << _PyGC_PREV_SHIFT), number of bytes.
+ */
+#define _PyObject_MIN_ALIGNMENT 4
+
 /* Nothing is actually declared to be a PyObject, but every pointer to
  * a Python object can be cast to a PyObject*.  This is inheritance built
  * by hand.  Similarly every pointer to a variable-size Python object can,
@@ -136,6 +142,7 @@ struct _object {
 #else
         Py_ssize_t ob_refcnt;
 #endif
+        _Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, char) _aligner;
     };
 #ifdef _MSC_VER
     __pragma(warning(pop))
@@ -153,7 +160,7 @@ struct _object {
     // ob_tid stores the thread id (or zero). It is also used by the GC and the
     // trashcan mechanism as a linked list pointer and by the GC to store the
     // computed "gc_refs" refcount.
-    uintptr_t ob_tid;
+    _Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) ob_tid;
     uint16_t ob_flags;
     PyMutex ob_mutex;           // per-object lock
     uint8_t ob_gc_bits;         // gc-related state
--- a/Include/pymacro.h
+++ b/Include/pymacro.h
@@ -24,44 +24,66 @@
 #endif
 
 
-// _Py_ALIGN_AS: this compiler's spelling of `alignas` keyword,
-// We currently use alignas for free-threaded builds only; additional compat
-// checking would be great before we add it to the default build.
-// Standards/compiler support:
+// _Py_ALIGNED_DEF(N, T): Define a variable/member with increased alignment
+//
+// `N`: the desired minimum alignment, an integer literal, number of bytes
+// `T`: the type of the defined variable
+//      (or a type with at least the defined variable's alignment)
+//
+// May not be used on a struct definition.
+//
+// Standards/compiler support for `alignas` alternatives:
 // - `alignas` is a keyword in C23 and C++11.
 // - `_Alignas` is a keyword in C11
 // - GCC & clang has __attribute__((aligned))
 //   (use that for older standards in pedantic mode)
 // - MSVC has __declspec(align)
 // - `_Alignas` is common C compiler extension
-// Older compilers may name it differently; to allow compilation on such
-// unsupported platforms, we don't redefine _Py_ALIGN_AS if it's already
+// Older compilers may name `alignas` differently; to allow compilation on such
+// unsupported platforms, we don't redefine _Py_ALIGNED_DEF if it's already
 // defined. Note that defining it wrong (including defining it to nothing) will
 // cause ABI incompatibilities.
-#ifdef Py_GIL_DISABLED
-#   ifndef _Py_ALIGN_AS
-#       ifdef __cplusplus
-#           if __cplusplus >= 201103L
-#               define _Py_ALIGN_AS(V) alignas(V)
-#           elif defined(__GNUC__) || defined(__clang__)
-#               define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
-#           elif defined(_MSC_VER)
-#               define _Py_ALIGN_AS(V) __declspec(align(V))
-#           else
-#               define _Py_ALIGN_AS(V) alignas(V)
-#           endif
-#       elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
-#           define _Py_ALIGN_AS(V) alignas(V)
-#       elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
-#           define _Py_ALIGN_AS(V) _Alignas(V)
-#       elif (defined(__GNUC__) || defined(__clang__))
-#           define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
-#       elif defined(_MSC_VER)
-#           define _Py_ALIGN_AS(V) __declspec(align(V))
-#       else
-#           define _Py_ALIGN_AS(V) _Alignas(V)
-#       endif
-#   endif
+//
+// Behavior of `alignas` alternatives:
+// - `alignas` & `_Alignas`:
+//   - Can be used multiple times; the greatest alignment applies.
+//   - It is an *error* if the combined effect of all `alignas` modifiers would
+//     decrease the alignment.
+//   - Takes types or numbers.
+//   - May not be used on a struct definition, unless also defining a variable.
+// - `__declspec(align)`:
+//   - Has no effect if it would decrease alignment.
+//   - Only takes an integer literal.
+//   - May be used on struct or variable definitions.
+//     However, when defining both the struct and the variable at once,
+//     `declspec(aligned)` causes compiler warning 5274 and possible ABI
+//     incompatibility.
+// - ` __attribute__((aligned))`:
+//   - Has no effect if it would decrease alignment.
+//   - Takes types or numbers
+//   - May be used on struct or variable definitions.
+#ifndef _Py_ALIGNED_DEF
+#    ifdef __cplusplus
+#        if __cplusplus >= 201103L
+#            define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
+#        elif defined(__GNUC__) || defined(__clang__)
+#            define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
+#        elif defined(_MSC_VER)
+#            define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
+#        else
+#            define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
+#        endif
+#    elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+#        define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
+#    elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+#        define _Py_ALIGNED_DEF(N, T)  _Alignas(N) _Alignas(T) T
+#    elif (defined(__GNUC__) || defined(__clang__))
+#        define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
+#    elif defined(_MSC_VER)
+#        define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
+#    else
+#        define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T
+#    endif
 #endif
 
 /* Minimum value between x and y */
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2024-12-04-10-00-35.gh-issue-127545.t0THjE.rst
@@ -0,0 +1 @@
+Fix crash when building on Linux/m68k.