1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
|
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/* This inlined version is for 32-bit ARM platform only */
#if !defined(__arm__)
#error "This is for ARM only"
#endif
/* 16-bit thumb doesn't work inlined assember version */
#if (!defined(__thumb__) || defined(__thumb2__)) && !defined(__ARM_ARCH_3__)
#include "mpi-priv.h"
#ifdef MP_ASSEMBLY_MULTIPLY
void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
__asm__ __volatile__(
"mov r5, #0\n"
#ifdef __thumb2__
"cbz %1, 2f\n"
#else
"cmp %1, r5\n" /* r5 is 0 now */
"beq 2f\n"
#endif
"1:\n"
"mov r4, #0\n"
"ldr r6, [%0], #4\n"
"umlal r5, r4, r6, %2\n"
"str r5, [%3], #4\n"
"mov r5, r4\n"
"subs %1, #1\n"
"bne 1b\n"
"2:\n"
"str r5, [%3]\n"
:
: "r"(a), "r"(a_len), "r"(b), "r"(c)
: "memory", "cc", "%r4", "%r5", "%r6");
}
void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
__asm__ __volatile__(
"mov r5, #0\n"
#ifdef __thumb2__
"cbz %1, 2f\n"
#else
"cmp %1, r5\n" /* r5 is 0 now */
"beq 2f\n"
#endif
"1:\n"
"mov r4, #0\n"
"ldr r6, [%3]\n"
"adds r5, r6\n"
"adc r4, r4, #0\n"
"ldr r6, [%0], #4\n"
"umlal r5, r4, r6, %2\n"
"str r5, [%3], #4\n"
"mov r5, r4\n"
"subs %1, #1\n"
"bne 1b\n"
"2:\n"
"str r5, [%3]\n"
:
: "r"(a), "r"(a_len), "r"(b), "r"(c)
: "memory", "cc", "%r4", "%r5", "%r6");
}
void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
if (!a_len)
return;
__asm__ __volatile__(
"mov r5, #0\n"
"1:\n"
"mov r4, #0\n"
"ldr r6, [%3]\n"
"adds r5, r6\n"
"adc r4, r4, #0\n"
"ldr r6, [%0], #4\n"
"umlal r5, r4, r6, %2\n"
"str r5, [%3], #4\n"
"mov r5, r4\n"
"subs %1, #1\n"
"bne 1b\n"
#ifdef __thumb2__
"cbz r4, 3f\n"
#else
"cmp r4, #0\n"
"beq 3f\n"
#endif
"2:\n"
"mov r4, #0\n"
"ldr r6, [%3]\n"
"adds r5, r6\n"
"adc r4, r4, #0\n"
"str r5, [%3], #4\n"
"movs r5, r4\n"
"bne 2b\n"
"3:\n"
:
: "r"(a), "r"(a_len), "r"(b), "r"(c)
: "memory", "cc", "%r4", "%r5", "%r6");
}
#endif
#ifdef MP_ASSEMBLY_SQUARE
void s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
{
if (!a_len)
return;
__asm__ __volatile__(
"mov r3, #0\n"
"1:\n"
"mov r4, #0\n"
"ldr r6, [%0], #4\n"
"ldr r5, [%2]\n"
"adds r3, r5\n"
"adc r4, r4, #0\n"
"umlal r3, r4, r6, r6\n" /* w = r3:r4 */
"str r3, [%2], #4\n"
"ldr r5, [%2]\n"
"adds r3, r4, r5\n"
"mov r4, #0\n"
"adc r4, r4, #0\n"
"str r3, [%2], #4\n"
"mov r3, r4\n"
"subs %1, #1\n"
"bne 1b\n"
#ifdef __thumb2__
"cbz r3, 3f\n"
#else
"cmp r3, #0\n"
"beq 3f\n"
#endif
"2:\n"
"mov r4, #0\n"
"ldr r5, [%2]\n"
"adds r3, r5\n"
"adc r4, r4, #0\n"
"str r3, [%2], #4\n"
"movs r3, r4\n"
"bne 2b\n"
"3:"
:
: "r"(pa), "r"(a_len), "r"(ps)
: "memory", "cc", "%r3", "%r4", "%r5", "%r6");
}
#endif
#endif
|