File: shift.S

package info (click to toggle)
numerix 0.22-3
links: PTS
area: main
in suites: etch, etch-m68k
size: 4,380 kB
ctags: 4,165
sloc: asm: 26,210; ansic: 12,168; ml: 4,912; sh: 3,899; pascal: 414; makefile: 179
file content (207 lines) | stat: -rw-r--r-- 6,636 bytes
parent folder | download | duplicates (2)
// file kernel/n/ppc32/shift.S: shift of natural integers
/*-----------------------------------------------------------------------+
 |  Copyright 2005-2006, Michel Quercia (michel.quercia@prepas.org)      |
 |                                                                       |
 |  This file is part of Numerix. Numerix is free software; you can      |
 |  redistribute it and/or modify it under the terms of the GNU Lesser   |
 |  General Public License as published by the Free Software Foundation; |
 |  either version 2.1 of the License, or (at your option) any later     |
 |  version.                                                             |
 |                                                                       |
 |  The Numerix Library is distributed in the hope that it will be       |
 |  useful, but WITHOUT ANY WARRANTY; without even the implied warranty  |
 |  of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU  |
 |  Lesser General Public License for more details.                      |
 |                                                                       |
 |  You should have received a copy of the GNU Lesser General Public     |
 |  License along with the GNU MP Library; see the file COPYING. If not, |
 |  write to the Free Software Foundation, Inc., 59 Temple Place -       |
 |  Suite 330, Boston, MA 02111-1307, USA.                               |
 +-----------------------------------------------------------------------+
 |                                                                       |
 |                                 Dcalages                             |
 |                                                                       |
 +-----------------------------------------------------------------------*/

                 ; +---------------------------------------+
                 ; |  Dcalage par adresses dcroissantes  |
                 ; +---------------------------------------+
        
; chiffre xn(shift_down)(chiffre *a, long la, chiffre *b, int k)
;
;  entre :
;  a = naturel de longueur la > 0
;  b = naturel de longueur la, peut tre confondu avec a
;  k = entier tel que 0 <= k < HW
;
;  sortie :
;  b <- a >> k
;  retourne a mod 2^k

#ifdef assembly_sn_shift_down
#define L(x) .Lsn_shift_down_##x
.globl _sn_shift_down
_sn_shift_down:

	; si k = 0, copie simple
	and.    r6,  r6,  r6
	bne     L(non_nul)
	slwi    r7,  r4,  2
	add     r3,  r3,  r7	; a += la
	add     r5,  r5,  r7	; b += la
	mtctr   r4
1:
	lwzu    r7,  -4(r3)
	stwu    r7,  -4(r5)
	bdnz    1b
	li      r3,   0		; retourne 0 (pas de bits perdus)
	blr
	
L(non_nul):
        mflr    r0              ; r0  <- adresse de retour

	; prpare le droulement de la boucle
	neg     r7,  r4
        clrlslwi r7, r7,   27,2 ; r7 <- 4*((-la) % 32)
	bcl    20,31, L(here)   ; lr <- adresse d entre dans la boucle
L(here):
	mflr   r8
/*	addis  r8,   r8, ha16(L(loop) - L(here)) */
	addi   r8,   r8, lo16(L(loop) - L(here))
	add    r8,   r8,   r7
	slwi   r7,   r7,   2
	add    r8,   r8,   r7
	mtlr   r8
	addi   r7,   r4,   31	; ctr <- ceil(la/32)
	srwi   r7,   r7,   5
	mtctr  r7
	subi   r7,   r7,   1	; cadre a et b sur le dbut du dernier bloc
	slwi   r7,   r7,   7
	add    r3,   r3,   r7
	add    r5,   r5,   r7

	li     r7,   32		; r7 <- 32 - k
	subf   r7,   r6,  r7
	li     r9,   0		; init retenue
	blrl			; effectue le dcalage

        ; corps de boucle  drouler (5 instructions, entrer  la 1re)
#define BODY(x) \
        lwz    r4,   x(r3)      @\
	srw    r8,   r4,   r6   @\
	add    r8,   r8,   r9   @\
	slw    r9,   r4,   r7   @\
        stw    r8,   x(r5)

	; boucle droule pour 32 chiffres
L(loop):
        BODY(124)@ BODY(120)@ BODY(116)@ BODY(112)
        BODY(108)@ BODY(104)@ BODY(100)@ BODY(96)
        BODY(92)@  BODY(88)@  BODY(84)@  BODY(80)
        BODY(76)@  BODY(72)@  BODY(68)@  BODY(64)
        BODY(60)@  BODY(56)@  BODY(52)@  BODY(48)
        BODY(44)@  BODY(40)@  BODY(36)@  BODY(32)
        BODY(28)@  BODY(24)@  BODY(20)@  BODY(16)
        BODY(12)@  BODY(8)@   BODY(4)@   BODY(0)
#undef BODY

	subi   r3,  r3,   128
	subi   r5,  r5,   128
	bdnz   L(loop)

	srw    r3,  r9,   r7	; r3 <- bits perdus
	mtlr   r0
	blr

#undef L
#endif /* assembly_sn_shift_down */

                 ; +-------------------------------------+
                 ; |  Dcalage par adresses croissantes  |
                 ; +-------------------------------------+
        
; chiffre xn(shift_up)(chiffre *a, long la, chiffre *b, int k)
;
;  entre :
;  a = naturel de longueur la > 0
;  b = naturel de longueur la, peut tre confondu avec a
;  k = entier tel que 0 <= k < HW
;
;  sortie :
;  b <- a << k
;  retourne les k bits de poids fort de a

#ifdef assembly_sn_shift_up
#define L(x) .Lsn_shift_up_##x
.globl _sn_shift_up
_sn_shift_up:

	; si k = 0, copie simple
	and.    r6,  r6,  r6
	bne     L(non_nul)
	subi    r3,  r3,  4	; recule a et b
	subi    r5,  r5,  4
	mtctr   r4
1:
	lwzu    r7,  4(r3)
	stwu    r7,  4(r5)
	bdnz    1b
	li      r3,   0		; retourne 0 (pas de bits perdus)
	blr

L(non_nul):	
        mflr    r0              ; r0  <- adresse de retour

	; prpare le droulement de la boucle
	neg     r7,  r4
        clrlslwi r7, r7,   27,2 ; r7 <- 4*((-la) % 32)
	subf    r3,   r7,   r3	; cadre a et b sur un multiple de 32 chiffres
	subf    r5,   r7,   r5
	bcl    20,31, L(here)   ; lr <- adresse d entre dans la boucle
L(here):
	mflr   r8
/*	addis  r8,   r8, ha16(L(loop) - L(here)) */
	addi   r8,   r8, lo16(L(loop) - L(here))
	add    r8,   r8,   r7
	slwi   r7,   r7,   2
	add    r8,   r8,   r7
	mtlr   r8
	addi   r7,   r4,   31	; ctr <- ceil(la/32)
	srwi   r7,   r7,   5
	mtctr  r7

	li     r7,   32		; r7 <- 32 - k
	subf   r7,   r6,  r7
	li     r9,   0		; init retenue
	blrl			; effectue le dcalage

        ; corps de boucle  drouler (5 instructions, entrer  la 1re)
#define BODY(x) \
        lwz    r4,   x(r3)      @\
	slw    r8,   r4,   r6   @\
	add    r8,   r8,   r9   @\
	srw    r9,   r4,   r7   @\
        stw    r8,   x(r5)

	; boucle droule pour 32 chiffres
L(loop):
        BODY(0)@   BODY(4)@   BODY(8)@   BODY(12)
        BODY(16)@  BODY(20)@  BODY(24)@  BODY(28)
        BODY(32)@  BODY(36)@  BODY(40)@  BODY(44)
        BODY(48)@  BODY(52)@  BODY(56)@  BODY(60)
        BODY(64)@  BODY(68)@  BODY(72)@  BODY(76)
        BODY(80)@  BODY(84)@  BODY(88)@  BODY(92)
        BODY(96)@  BODY(100)@ BODY(104)@ BODY(108)
        BODY(112)@ BODY(116)@ BODY(120)@ BODY(124)
#undef BODY

	addi   r3,  r3,   128
	addi   r5,  r5,   128
	bdnz   L(loop)

	mr     r3,  r9		; r3 <- bits perdus
	mtlr   r0
	blr

#undef L
#endif /* assembly_sn_shift_up */