1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
|
dnl ******************************************************************************
dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa.
dnl
dnl This file is part of the ECM Library.
dnl
dnl The ECM Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published by
dnl the Free Software Foundation; either version 3 of the License, or (at your
dnl option) any later version.
dnl
dnl The ECM Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the ECM Library; see the file COPYING.LIB. If not, write to
dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
dnl MA 02110-1301, USA.
dnl ******************************************************************************
define(C, `
dnl')
C mp_limb_t mulredc2(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y,
C const mp_limb_t *m, mp_limb_t inv_m);
C
C arguments:
C r3 = ptr to result z least significant limb
C r4 = ptr to input x least significant limb
C r5 = ptr to input y least significant limb
C r6 = ptr to modulus m least significant limb
C r7 = -1/m mod 2^64
C
C final carry returned in r3
include(`config.m4')
GLOBL GSYM_PREFIX`'mulredc2
GLOBL .GSYM_PREFIX`'mulredc2
.section ".opd", "aw"
.align 3
GSYM_PREFIX`'mulredc2:
.quad .GSYM_PREFIX`'mulredc2, .TOC.@tocbase, 0
.size GSYM_PREFIX`'mulredc2, 24
TEXT
.align 5 C powerPC 32 byte alignment
.GSYM_PREFIX`'mulredc2:
ld r12, 0(r4) C XI = x[0]
ld r0, 0(r5) C y[0]
stdu r13, -8(r1) C save r13
mulld r8, r0, r12 C x[0]*y[0] low half
stdu r14, -8(r1) C save r14
mulhdu r9, r0, r12 C x[0]*y[0] high half
ld r0, 0(r6) C m[0]
mulld r11, r7, r8 C U = T0*invm mod 2^64
stdu r15, -8(r1) C save r15
mulld r13, r0, r11 C T0 = U*m[0] low
stdu r16, -8(r1) C save r16
li r16, 0 C set r16 to zero for carry propagation
mulhdu r14, r0, r11 C T1 = U*m[0] high
ld r0, 8(r5) C y[1]
addc r8, r8, r13 C result zero
mulld r8, r0, r12 C x[0]*y[1] low half
adde r13, r9, r14 C T0 = initial tmp(0)
addze r10, r16 C carry to CY
mulhdu r9, r0, r12 C x[0]*y[1] high half
ld r0, 8(r6) C m[1]
addc r13, r8, r13 C add low word to T0
mulld r8, r0, r11 C U*m[1] low
adde r14, r9, r10 C add high word with carry + CY to T1
C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry!
mulhdu r9, r0, r11 C U*m[1] high
ldu r12, 8(r4) C x[1]
ld r0, 0(r5) C y[0]
addc r13, r8, r13 C add T0 and low word
mulld r8, r0, r12 C x[1]*y[0] low half
adde r14, r9, r14 C add high word with carry to T1
addze r15, r16 C put carry in r15 (tmp[len] <= 1)
mulhdu r9, r0, r12 C x[1]*y[0] high half
addc r13, r8, r13 C T0
ld r0, 0(r6) C m[0]
mulld r11, r7, r13 C U = T0*invm mod 2^64
adde r14, r9, r14 C T1
mulld r8, r0, r11 C U*m[0] low
addze r10, r16 C CY
mulhdu r9, r0, r11 C T1 = U*m[0] high
ld r0, 8(r5) C y[1]
addc r8, r8, r13 C result = 0
adde r13, r9, r14 C T0, carry pending
mulld r8, r0, r12 C x[1]*y[1] low half
adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry
C since tmp[len] <= 1, T1 <= 3 and carry is zero
mulhdu r9, r0, r12 C x[1]*y[1] high half
ld r0, 8(r6) C m[1]
addc r13, r8, r13 C add low word to T0
mulld r8, r0, r11 C U*m[1] low
adde r14, r9, r14 C add high to T1
addze r10, r16 C CY
mulhdu r9, r0, r11 C U*m[1] high
addc r8, r8, r13 C add T0 and low word
adde r9, r9, r14 C T1, carry pending
std r8, 0(r3) C copy result to z
stdu r9, 8(r3)
addze r3, r10 C return tmp(len)
ld r16, 0(r1)
ldu r15, 8(r1)
ldu r14, 8(r1)
ldu r13, 8(r1)
addi r1, r1, 8
blr
.size .GSYM_PREFIX`'mulredc2, .-.GSYM_PREFIX`'mulredc2
|