File: montgomery.S

package info (click to toggle)
numerix 0.22-4
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 4,380 kB
  • ctags: 4,165
  • sloc: asm: 26,210; ansic: 12,168; ml: 4,912; sh: 3,899; pascal: 414; makefile: 179
file content (145 lines) | stat: -rw-r--r-- 4,447 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
// file kernel/n/ppc32/montgomery.S: Montgomery modular exponentiation
/*-----------------------------------------------------------------------+
 |  Copyright 2005-2006, Michel Quercia (michel.quercia@prepas.org)      |
 |                                                                       |
 |  This file is part of Numerix. Numerix is free software; you can      |
 |  redistribute it and/or modify it under the terms of the GNU Lesser   |
 |  General Public License as published by the Free Software Foundation; |
 |  either version 2.1 of the License, or (at your option) any later     |
 |  version.                                                             |
 |                                                                       |
 |  The Numerix Library is distributed in the hope that it will be       |
 |  useful, but WITHOUT ANY WARRANTY; without even the implied warranty  |
 |  of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU  |
 |  Lesser General Public License for more details.                      |
 |                                                                       |
 |  You should have received a copy of the GNU Lesser General Public     |
 |  License along with the GNU MP Library; see the file COPYING. If not, |
 |  write to the Free Software Foundation, Inc., 59 Temple Place -       |
 |  Suite 330, Boston, MA 02111-1307, USA.                               |
 +-----------------------------------------------------------------------+
 |                                                                       |
 |                   Exponentiation modulaire de Montgomery              |
 |                                                                       |
 +-----------------------------------------------------------------------*/

                          ; +----------------------+
                          ; |  Division modulaire  |
                          ; +----------------------+


; void xn(mgdiv_n2)(chiffre *a, chiffre *c, chiffre d, long n)
;
; entre :
; a = naturel de longueur 2n+1
; c = naturel de longueur n
; d = -1/c mod BASE
;
; contraintes :
; n >=2, a[0..2n-1] <= (BASE^n - 1)^2, a,c non confondus
;
; sortie :
; a[n..2n-1] <- a[0..2n-1]/BASE^n mod c, non normalis

#ifdef assembly_sn_mgdiv_n2
#undef L
#define L(x) .Lsn_mgdiv_##x
.globl _sn_mgdiv_n2
_sn_mgdiv_n2:

	; variables locales
	#define _a_     r12
	#define _c_     r11
	#define _d_     r7
	#define _n_     r6
	#define _la_    r8
	#define _madd_  r10
	#define _x_     r3
	#define _y_     r4
	#define _q_     r9
	#define _r_     r2

	; initialise les registres
	mflr    r0
	slwi    r2,   _n_,  2
	add     _a_,  r3,   r2	; a <- &a[n]
	add     _c_,  r4,   r2	; c <- &c[n]
	mr      _d_,  r5
	mr      _la_, _n_

	; prpare le droulement de la boucle interne
	bcl    20,31, L(here)
L(here):
	mflr   _madd_
	neg    r2,  _n_
        clrlslwi r2, r2,   27,2 ; r2 <- 4*((-n) % 32)
	mulli  r2,  r2,  9
	add    _madd_, _madd_, r2
	addi   _madd_, _madd_, lo16(Lsn_muladdloop - L(here))
/*	addis  _madd_, _madd_, ha16(Lsn_muladdloop - L(here)) */

	; a <- a - c*(a/c mod BASE^n)
	li     _r_,  0		; init retenue sur a[n]
L(loop):
	slwi   _x_,  _n_,  2
	subf   _x_,  _x_,  _a_
	lwz    _x_,  0(_x_)	; x <- a[0]
	mullw  _q_,  _x_,  _d_	; q <- *da[0] mod BASE
	mtlr   _madd_
	addi   _x_,  _n_,  31
	srwi   _x_,  _x_,  5
	mtctr  _x_		; ctr <- ceil(n/32)
	slwi   _x_,  _x_,  7
	subf   _a_,  _x_,  _a_	; recadre a et c
	subf   _c_,  _x_,  _c_
	li     _x_,  0
	blrl
	subic  _r_,  _r_,  1	; ajoute la retenue sur a[n]
	lwz    _y_,  0(_a_)
	adde   _x_,  _x_, _y_
	stw    _x_,  0(_a_)
	li     _r_,  0
	addze  _r_,  _r_
	subic. _la_, _la_, 1
	addi   _a_,  _a_,  4
	bne    L(loop)

	; s il y a retenue, retranche c
	subic.  _r_,  _r_,  1
	bne    L(done)
	bcl    20,31, L(there)
L(there):
	mflr   _madd_
	neg    r2,  _n_
        clrlslwi r2, r2,   27,4 ; r2 <- 16*((-n) % 32)
	add    _madd_, _madd_, r2
	addi   _madd_, _madd_, lo16(Lsn_subloop - L(there))
/*	addis  _madd_, _madd_, ha16(Lsn_subloop - L(there)) */
	mtlr   _madd_
	addi   _x_,  _n_,  31
	srwi   _x_,  _x_,  5
	mtctr  _x_		; ctr <- ceil(n/32)
	slwi   _x_,  _x_,  7
	subf   _a_,  _x_,  _a_	; recadre a et c
	subf   _c_,  _x_,  _c_
	mr     r10,  _a_
	blrl

	; termin
L(done):
	mtlr   r0
	blr

	#undef _a_
	#undef _c_
	#undef _d_
	#undef _n_
	#undef _la_
	#undef _madd_
	#undef _x_
	#undef _y_
	#undef _q_
	#undef _r_

#undef L
#endif /* assembly_sn_mgdiv_n2 */