File: montgomery.S

package info (click to toggle)
numerix 0.22-4
links: PTS, VCS
area: main
in suites: lenny
size: 4,380 kB
ctags: 4,165
sloc: asm: 26,210; ansic: 12,168; ml: 4,912; sh: 3,899; pascal: 414; makefile: 179
file content (145 lines) | stat: -rw-r--r-- 4,447 bytes
parent folder | download | duplicates (2)
// file kernel/n/ppc32/montgomery.S: Montgomery modular exponentiation
/*-----------------------------------------------------------------------+
 |  Copyright 2005-2006, Michel Quercia (michel.quercia@prepas.org)      |
 |                                                                       |
 |  This file is part of Numerix. Numerix is free software; you can      |
 |  redistribute it and/or modify it under the terms of the GNU Lesser   |
 |  General Public License as published by the Free Software Foundation; |
 |  either version 2.1 of the License, or (at your option) any later     |
 |  version.                                                             |
 |                                                                       |
 |  The Numerix Library is distributed in the hope that it will be       |
 |  useful, but WITHOUT ANY WARRANTY; without even the implied warranty  |
 |  of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU  |
 |  Lesser General Public License for more details.                      |
 |                                                                       |
 |  You should have received a copy of the GNU Lesser General Public     |
 |  License along with the GNU MP Library; see the file COPYING. If not, |
 |  write to the Free Software Foundation, Inc., 59 Temple Place -       |
 |  Suite 330, Boston, MA 02111-1307, USA.                               |
 +-----------------------------------------------------------------------+
 |                                                                       |
 |                   Exponentiation modulaire de Montgomery              |
 |                                                                       |
 +-----------------------------------------------------------------------*/

                          ; +----------------------+
                          ; |  Division modulaire  |
                          ; +----------------------+


; void xn(mgdiv_n2)(chiffre *a, chiffre *c, chiffre d, long n)
;
; entre :
; a = naturel de longueur 2n+1
; c = naturel de longueur n
; d = -1/c mod BASE
;
; contraintes :
; n >=2, a[0..2n-1] <= (BASE^n - 1)^2, a,c non confondus
;
; sortie :
; a[n..2n-1] <- a[0..2n-1]/BASE^n mod c, non normalis

#ifdef assembly_sn_mgdiv_n2
#undef L
#define L(x) .Lsn_mgdiv_##x
.globl _sn_mgdiv_n2
_sn_mgdiv_n2:

	; variables locales
	#define _a_     r12
	#define _c_     r11
	#define _d_     r7
	#define _n_     r6
	#define _la_    r8
	#define _madd_  r10
	#define _x_     r3
	#define _y_     r4
	#define _q_     r9
	#define _r_     r2

	; initialise les registres
	mflr    r0
	slwi    r2,   _n_,  2
	add     _a_,  r3,   r2	; a <- &a[n]
	add     _c_,  r4,   r2	; c <- &c[n]
	mr      _d_,  r5
	mr      _la_, _n_

	; prpare le droulement de la boucle interne
	bcl    20,31, L(here)
L(here):
	mflr   _madd_
	neg    r2,  _n_
        clrlslwi r2, r2,   27,2 ; r2 <- 4*((-n) % 32)
	mulli  r2,  r2,  9
	add    _madd_, _madd_, r2
	addi   _madd_, _madd_, lo16(Lsn_muladdloop - L(here))
/*	addis  _madd_, _madd_, ha16(Lsn_muladdloop - L(here)) */

	; a <- a - c*(a/c mod BASE^n)
	li     _r_,  0		; init retenue sur a[n]
L(loop):
	slwi   _x_,  _n_,  2
	subf   _x_,  _x_,  _a_
	lwz    _x_,  0(_x_)	; x <- a[0]
	mullw  _q_,  _x_,  _d_	; q <- *da[0] mod BASE
	mtlr   _madd_
	addi   _x_,  _n_,  31
	srwi   _x_,  _x_,  5
	mtctr  _x_		; ctr <- ceil(n/32)
	slwi   _x_,  _x_,  7
	subf   _a_,  _x_,  _a_	; recadre a et c
	subf   _c_,  _x_,  _c_
	li     _x_,  0
	blrl
	subic  _r_,  _r_,  1	; ajoute la retenue sur a[n]
	lwz    _y_,  0(_a_)
	adde   _x_,  _x_, _y_
	stw    _x_,  0(_a_)
	li     _r_,  0
	addze  _r_,  _r_
	subic. _la_, _la_, 1
	addi   _a_,  _a_,  4
	bne    L(loop)

	; s il y a retenue, retranche c
	subic.  _r_,  _r_,  1
	bne    L(done)
	bcl    20,31, L(there)
L(there):
	mflr   _madd_
	neg    r2,  _n_
        clrlslwi r2, r2,   27,4 ; r2 <- 16*((-n) % 32)
	add    _madd_, _madd_, r2
	addi   _madd_, _madd_, lo16(Lsn_subloop - L(there))
/*	addis  _madd_, _madd_, ha16(Lsn_subloop - L(there)) */
	mtlr   _madd_
	addi   _x_,  _n_,  31
	srwi   _x_,  _x_,  5
	mtctr  _x_		; ctr <- ceil(n/32)
	slwi   _x_,  _x_,  7
	subf   _a_,  _x_,  _a_	; recadre a et c
	subf   _c_,  _x_,  _c_
	mr     r10,  _a_
	blrl

	; termin
L(done):
	mtlr   r0
	blr

	#undef _a_
	#undef _c_
	#undef _d_
	#undef _n_
	#undef _la_
	#undef _madd_
	#undef _x_
	#undef _y_
	#undef _q_
	#undef _r_

#undef L
#endif /* assembly_sn_mgdiv_n2 */