File: mulredc2.asm

package info (click to toggle)
gmp-ecm 7.0.6%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,816 kB
  • sloc: ansic: 41,655; asm: 26,855; sh: 964; xml: 628; python: 493; makefile: 322
file content (122 lines) | stat: -rw-r--r-- 4,626 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
dnl ******************************************************************************
dnl   Copyright 2009 Paul Zimmermann and Alexander Kruppa.
dnl 
dnl   This file is part of the ECM Library.
dnl 
dnl   The ECM Library is free software; you can redistribute it and/or modify
dnl   it under the terms of the GNU Lesser General Public License as published by
dnl   the Free Software Foundation; either version 3 of the License, or (at your
dnl   option) any later version.
dnl 
dnl   The ECM Library is distributed in the hope that it will be useful, but
dnl   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
dnl   License for more details.
dnl 
dnl   You should have received a copy of the GNU Lesser General Public License
dnl   along with the ECM Library; see the file COPYING.LIB.  If not, write to
dnl   the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
dnl   MA 02110-1301, USA.
dnl ******************************************************************************

define(C, `
dnl')

C mp_limb_t mulredc2(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y,
C                 const mp_limb_t *m, mp_limb_t inv_m);
C
C arguments:
C r3 = ptr to result z least significant limb
C r4 = ptr to input x least significant limb
C r5 = ptr to input y least significant limb
C r6 = ptr to modulus m least significant limb
C r7 = -1/m mod 2^64
C
C final carry returned in r3



include(`config.m4')

	GLOBL GSYM_PREFIX`'mulredc2
	GLOBL .GSYM_PREFIX`'mulredc2

	.section ".opd", "aw"
	.align	3
GSYM_PREFIX`'mulredc2:
	.quad	.GSYM_PREFIX`'mulredc2, .TOC.@tocbase, 0
	.size	GSYM_PREFIX`'mulredc2, 24

	TEXT
	.align	5	C powerPC 32 byte alignment
.GSYM_PREFIX`'mulredc2:
		ld      r12, 0(r4)          C XI = x[0]
		ld      r0, 0(r5)           C y[0]
		stdu    r13, -8(r1)         C save r13
		mulld   r8, r0, r12         C x[0]*y[0] low half
		stdu    r14, -8(r1)         C save r14
		mulhdu  r9, r0, r12         C x[0]*y[0] high half
		ld      r0, 0(r6)           C m[0]
		mulld   r11, r7, r8         C U = T0*invm mod 2^64
		stdu    r15, -8(r1)         C save r15
		mulld   r13, r0, r11        C T0 = U*m[0] low
		stdu    r16, -8(r1)         C save r16
		li      r16, 0              C set r16 to zero for carry propagation
		mulhdu  r14, r0, r11        C T1 = U*m[0] high
		ld      r0, 8(r5)           C y[1]
		addc    r8, r8, r13         C result zero
		mulld   r8, r0, r12         C x[0]*y[1] low half
		adde    r13, r9, r14        C T0 = initial tmp(0)
		addze   r10, r16            C carry to CY

		mulhdu  r9, r0, r12         C x[0]*y[1] high half
		ld      r0, 8(r6)           C m[1]
		addc    r13, r8, r13        C add low word to T0
		mulld   r8, r0, r11         C U*m[1] low
		adde    r14, r9, r10        C add high word with carry + CY to T1
		C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry!

		mulhdu  r9, r0, r11         C U*m[1] high
		ldu     r12, 8(r4)          C x[1]
		ld      r0, 0(r5)           C y[0]
		addc    r13, r8, r13        C add T0 and low word
		mulld   r8, r0, r12         C x[1]*y[0] low half
		adde    r14, r9, r14        C add high word with carry to T1
		addze   r15, r16            C put carry in r15 (tmp[len] <= 1)
		mulhdu  r9, r0, r12         C x[1]*y[0] high half
		addc    r13, r8, r13        C T0
		ld      r0, 0(r6)           C m[0]
		mulld   r11, r7, r13        C U = T0*invm mod 2^64
		adde    r14, r9, r14        C T1
		mulld   r8, r0, r11         C U*m[0] low
		addze   r10, r16            C CY
		mulhdu  r9, r0, r11         C T1 = U*m[0] high
		ld      r0, 8(r5)           C y[1]
		addc    r8, r8, r13         C result = 0
		adde    r13, r9, r14        C T0, carry pending

		mulld   r8, r0, r12         C x[1]*y[1] low half
		adde    r14, r15, r10       C T1 = tmp[len] + CY + pending carry
		C since tmp[len] <= 1, T1 <= 3 and carry is zero
		mulhdu  r9, r0, r12         C x[1]*y[1] high half
		ld      r0, 8(r6)           C m[1]
		addc    r13, r8, r13        C add low word to T0
		mulld   r8, r0, r11         C U*m[1] low
		adde    r14, r9, r14        C add high to T1
		addze   r10, r16            C CY
		mulhdu  r9, r0, r11         C U*m[1] high
		addc    r8, r8, r13         C add T0 and low word
		adde    r9, r9, r14         C T1, carry pending
		std     r8, 0(r3)           C copy result to z
		stdu    r9, 8(r3)

		addze   r3, r10             C return tmp(len)
		ld      r16, 0(r1)
		ldu     r15, 8(r1)
		ldu     r14, 8(r1)
		ldu     r13, 8(r1)
		addi    r1, r1, 8
		blr

	.size	.GSYM_PREFIX`'mulredc2, .-.GSYM_PREFIX`'mulredc2