1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
From 9c93b4607adcf9b3efd53aba43e2d33bf5aef9df Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Date: Sun, 4 Aug 2024 18:04:49 +0300
Subject: [PATCH] mpi/ec-inline: reduce register pressure on 32-bit ARM
* mpi/ec-inline.h [HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS] (ADD4_LIMB32)
(ADD6_LIMB32, SUB4_LIMB32, SUB6_LIMB32): Reuse input registers
as output (use just two unique operators).
--
This fixes building ec-nist.c with GCC-14 on 32-bit ARM.
GnuPG-bug-id: 7226
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
---
mpi/ec-inline.h | 63 ++++++++++++++++++++++++-------------------------
1 file changed, 31 insertions(+), 32 deletions(-)
diff --git a/mpi/ec-inline.h b/mpi/ec-inline.h
index c24d5352..3a526246 100644
--- a/mpi/ec-inline.h
+++ b/mpi/ec-inline.h
@@ -832,89 +832,88 @@ LIMB64_HILO(mpi_limb_t hi, mpi_limb_t lo)
#endif /* __i386__ */
/* ARM addition/subtraction helpers. */
#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
#define ADD4_LIMB32(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
- __asm__ ("adds %3, %7, %11\n" \
- "adcs %2, %6, %10\n" \
- "adcs %1, %5, %9\n" \
- "adc %0, %4, %8\n" \
+ __asm__ ("adds %3, %3, %11\n" \
+ "adcs %2, %2, %10\n" \
+ "adcs %1, %1, %9\n" \
+ "adc %0, %0, %8\n" \
: "=r" (A3), \
"=&r" (A2), \
"=&r" (A1), \
"=&r" (A0) \
- : "r" ((mpi_limb_t)(B3)), \
- "r" ((mpi_limb_t)(B2)), \
- "r" ((mpi_limb_t)(B1)), \
- "r" ((mpi_limb_t)(B0)), \
+ : "0" ((mpi_limb_t)(B3)), \
+ "1" ((mpi_limb_t)(B2)), \
+ "2" ((mpi_limb_t)(B1)), \
+ "3" ((mpi_limb_t)(B0)), \
"Ir" ((mpi_limb_t)(C3)), \
"Ir" ((mpi_limb_t)(C2)), \
"Ir" ((mpi_limb_t)(C1)), \
"Ir" ((mpi_limb_t)(C0)) \
: "cc")
#define ADD6_LIMB32(A5, A4, A3, A2, A1, A0, B5, B4, B3, B2, B1, B0, \
C5, C4, C3, C2, C1, C0) do { \
mpi_limb_t __carry6_32; \
- __asm__ ("adds %3, %7, %10\n" \
- "adcs %2, %6, %9\n" \
- "adcs %1, %5, %8\n" \
- "adc %0, %4, %4\n" \
+ __asm__ ("adds %3, %3, %10\n" \
+ "adcs %2, %2, %9\n" \
+ "adcs %1, %1, %8\n" \
+ "adc %0, %0, %0\n" \
: "=r" (__carry6_32), \
"=&r" (A2), \
"=&r" (A1), \
"=&r" (A0) \
- : "r" ((mpi_limb_t)(0)), \
- "r" ((mpi_limb_t)(B2)), \
- "r" ((mpi_limb_t)(B1)), \
- "r" ((mpi_limb_t)(B0)), \
+ : "0" ((mpi_limb_t)(0)), \
+ "1" ((mpi_limb_t)(B2)), \
+ "2" ((mpi_limb_t)(B1)), \
+ "3" ((mpi_limb_t)(B0)), \
"Ir" ((mpi_limb_t)(C2)), \
"Ir" ((mpi_limb_t)(C1)), \
"Ir" ((mpi_limb_t)(C0)) \
: "cc"); \
ADD4_LIMB32(A5, A4, A3, __carry6_32, B5, B4, B3, __carry6_32, \
C5, C4, C3, 0xffffffffU); \
} while (0)
#define SUB4_LIMB32(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
- __asm__ ("subs %3, %7, %11\n" \
- "sbcs %2, %6, %10\n" \
- "sbcs %1, %5, %9\n" \
- "sbc %0, %4, %8\n" \
+ __asm__ ("subs %3, %3, %11\n" \
+ "sbcs %2, %2, %10\n" \
+ "sbcs %1, %1, %9\n" \
+ "sbc %0, %0, %8\n" \
: "=r" (A3), \
"=&r" (A2), \
"=&r" (A1), \
"=&r" (A0) \
- : "r" ((mpi_limb_t)(B3)), \
- "r" ((mpi_limb_t)(B2)), \
- "r" ((mpi_limb_t)(B1)), \
- "r" ((mpi_limb_t)(B0)), \
+ : "0" ((mpi_limb_t)(B3)), \
+ "1" ((mpi_limb_t)(B2)), \
+ "2" ((mpi_limb_t)(B1)), \
+ "3" ((mpi_limb_t)(B0)), \
"Ir" ((mpi_limb_t)(C3)), \
"Ir" ((mpi_limb_t)(C2)), \
"Ir" ((mpi_limb_t)(C1)), \
"Ir" ((mpi_limb_t)(C0)) \
: "cc")
#define SUB6_LIMB32(A5, A4, A3, A2, A1, A0, B5, B4, B3, B2, B1, B0, \
C5, C4, C3, C2, C1, C0) do { \
mpi_limb_t __borrow6_32; \
- __asm__ ("subs %3, %7, %10\n" \
- "sbcs %2, %6, %9\n" \
- "sbcs %1, %5, %8\n" \
- "sbc %0, %4, %4\n" \
+ __asm__ ("subs %3, %3, %9\n" \
+ "sbcs %2, %2, %8\n" \
+ "sbcs %1, %1, %7\n" \
+ "sbc %0, %0, %0\n" \
: "=r" (__borrow6_32), \
"=&r" (A2), \
"=&r" (A1), \
"=&r" (A0) \
- : "r" ((mpi_limb_t)(0)), \
- "r" ((mpi_limb_t)(B2)), \
- "r" ((mpi_limb_t)(B1)), \
- "r" ((mpi_limb_t)(B0)), \
+ : "1" ((mpi_limb_t)(B2)), \
+ "2" ((mpi_limb_t)(B1)), \
+ "3" ((mpi_limb_t)(B0)), \
"Ir" ((mpi_limb_t)(C2)), \
"Ir" ((mpi_limb_t)(C1)), \
"Ir" ((mpi_limb_t)(C0)) \
: "cc"); \
SUB4_LIMB32(A5, A4, A3, __borrow6_32, B5, B4, B3, 0, \
C5, C4, C3, -__borrow6_32); \
} while (0)
--
2.43.0
|