File: exp.S

package info (click to toggle)
libffm 0.28-1
links: PTS
area: main
in suites: potato
size: 220 kB
ctags: 185
sloc: asm: 3,028; makefile: 95; ansic: 12; sh: 2
file content (376 lines) | stat: -rw-r--r-- 7,371 bytes
parent folder | download | duplicates (4)
/*
   libffm	- Free, pretty fast replacement for some math (libm) routines 
			on Linux/AXP, optimized for the 21164

   Copyright (C) 1998  Joachim Wesner <joachim.wesner@frankfurt.netsurf.de>
                  and  Kazushige Goto <goto@statabo.rim.or.jp>

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with this library (see file COPYING.LIB); if not, write 
   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, 
   MA 02139, USA.
*/
/*
   Fast 2^x, exp(x), 10^x approximations including range reduction 
   by Joachim Wesner, <joachim.wesner@frankfurt.netsurf.de>, 
   see also mc 8/1991 p. 78-93. Done in July 1998.
   
   This version uses a "longer" 11th order polynomial approximation, 
   but requires a somewhat simpler range reduction.
*/

/* improved and rescheduled by Kazushige Goto<goto@statabo.rim.or.jp> */

/*
   now including illegal argument handling by Kazushige Goto / Joachim Wesner
*/

	.set noat
	.set noreorder

#ifdef __ELF__
	.section .rodata
#else
	.rdata
#endif

	.align 5
R:
	.quad 0x40862E42FEFA39EE
	.quad 0xC086232BDD7ABCD1
	.quad 0x408FFFFFFFFFFFFF
	.quad 0xc08FEFFFFFFFFFFF

	.t_floating 1.00000000000000000000e0
	.t_floating 1.442695040888963407e0		# LDE
	.t_floating 6.93359375e-1			# K1
	.t_floating -2.1219444005469060e-4		# K2

	.quad 0x40734413509F79FE
	.quad 0xC0733A7146f72A41
	.quad 0x7e37e43c8800759c		# 1.0e300(dummy)
	.quad 0

	.t_floating  1.00000000000000000000e0
	.t_floating  3.321928094887362347		# LD10
	.t_floating  3.0103000998497010e-001		# J1
	.t_floating -1.4320988897547790e-008		# J2

/*
	2^x (0..1)
	Derived from Chebyshev Approx.
	rel. error 4e-18
*/
	.t_floating 6.29768236459390291646e-10
	.t_floating 1.02577755722569533985e-7
	.t_floating 1.52532278639779265219e-5
	.t_floating 1.33335587324213489593e-3

	.t_floating 5.55041086659065138753e-2
	.t_floating 6.93147180559946507472e-1
	.t_floating 6.54053877719848751881e-9
	.t_floating 1.32077456410841510838e-6

	.t_floating 1.54035094120123436379e-4
	.t_floating 9.61812909725173574571e-3
	.t_floating 2.40226506959042129807e-1
	.t_floating 9.99999999999999996e-01

.text
	.align 5
	.globl exp2
	.ent exp2
exp2:
	lda	$30,   -16($30)
	ldgp	$29,.-exp2($27)
	stt	$f16,    0($30)
	.frame	$30,16,$26,0

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
	.prologue 1

	lda	$1,    R
	ldq	$3,   16($1)
	ldq	$2,    0($30)
	ldq	$4,   24($1)

	sll	$2, 1, $0
	blt	$2, $Negative2
	cmplt	$3,  $2, $5
	bne	$5, $exception		# Value is too Large

	srl	$0,  53, $0
	br	$Calc2

$Negative2:
	cmplt	$4,  $2, $5
	srl	$0,  53, $0

	bne	$5, $exception		# Value is too Small
$Calc2:
	ldt	$f19, 128($1)
	unop
	beq	$0, $SubNormal
	.align 4


	cvttq/c	$f16, $f10	# double -> int
	ldt	$f1,   32($1)
	ldt	$f19, 128($1)
	ldt	$f20, 136($1)

	ldt	$f21, 144($1)
	ldt	$f22, 152($1)
	ldt	$f23, 160($1)
	ldt	$f24, 168($1)

	cvtqt	$f10, $f10		# int -> double
	ldt	$f25, 176($1)
	ldt	$f26, 184($1)
	ldt	$f27, 192($1)
	ldt	$f28, 200($1)

	fbge	$f16,$L2
	subt	$f10, $f1,  $f10	# iexp -= R[0]
	.align 4

$L2:
	subt	$f16, $f10, $f16	# x -= iexp
	ldt	$f29, 208($1)
	cvttqc	$f10,$f10		# double -> int 
	ldt	$f30, 216($1)

	br	$31, $exp_continue
	.end exp2

	.align 5
	.globl exp10
	.ent exp10
exp10:
	lda	$30,   -16($30)
	ldgp	$29,.-exp10($27)
	stt	$f16,    0($30)
	.frame	$30,16,$26,0

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
	.prologue 1

	lda	$1,    R
	ldq	$3,   64($1)
	ldq	$4,   72($1)
	ldt	$f1,  96($1)

	ldq	$2,    0($30)
	ldt	$f0, 104($1)		# LDE
	ldt	$f29,112($1)		# K1
	ldt	$f30,120($1)		# K2

	br	$31, $continue
	.end exp10

	.align 5
	.globl exp
	.ent exp
exp:
	lda	$30,  -16($30)
	ldgp	$29,.-exp($27)
	stt	$f16,   0($30)

	.frame	$30,16,$26,0

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
	.prologue 1

	lda	$1,    R
	ldq	$3,    0($1)
	ldq	$4,    8($1)
	ldt	$f1,  32($1)

	ldq	$2,    0($30)
	ldt	$f0,  40($1)		# LDE
	ldt	$f29, 48($1)		# K1
	ldt	$f30, 56($1)		# K2
	.align 4

$continue:
	sll	$2, 1, $0
	blt	$2, $Negative
	cmplt	$3,  $2, $5
	bne	$5, $exception		# Value is too Large

	srl	$0,  53, $6
	br	$Calc

$Negative:
	cmplt	$4,  $2, $7
	srl	$0,  53, $6

	bne	$7, $exception		# Value is too Small
$Calc:
	ldt	$f19, 128($1)
	unop
	beq	$6, $SubNormal
	.align 4

	mult	$f16, $f0, $f10		# iexp = x*LDE
	ldt	$f20, 136($1)

	ldt	$f21, 144($1)
	ldt	$f22, 152($1)
	ldt	$f23, 160($1)
	ldt	$f24, 168($1)

	ldt	$f25, 176($1)
	ldt	$f26, 184($1)
	cvttq/c	$f10, $f11		# double -> int
	ldt	$f27, 192($1)
	ldt	$f28, 200($1)

	cvtqt	$f11, $f10		# int -> double

	fbge	$f16, $L4		# if x>=0 goto $L4
	subt	$f10, $f1,  $f10	# iexp -= R[0]
$L4:
	fbeq	$f10, $L5

	mult	$f10, $f29, $f29	# iexp*K1
	mult	$f10, $f30, $f30	# iexp*K2
	subt	$f16, $f29, $f17	# x - iexp*K1
	subt	$f17, $f30, $f16	# x - iexp*K2
	.align 4

$L5:
	mult	$f16, $f0, $f16		# x *= LDE
	ldt	$f29, 208($1)
	ldt	$f30, 216($1)
	cvttqc	$f10, $f10		# double -> int 
	.align 4

$exp_continue:
	mult	$f16, $f16, $f11	# x2 = x * x
	mult	$f19, $f16, $f19	# y1 = R[4] * x
	mult	$f20, $f16, $f20	# y2 = R[5] * x
	mult	$f21, $f16, $f21	# y3 = R[6] * x

	mult	$f11, $f11, $f12	# x4 = x2 * x2
	mult	$f22, $f16, $f22	# y4 = R[8] * x
	stt	$f10, 0($30)
	addt	$f19, $f25, $f19	# y1 += R[10]

	mult	$f23, $f16, $f23	# y5 = R[9] * x
	addt	$f20, $f26, $f20	# y2 += R[11]
	mult	$f24, $f16, $f24	# y6 = R[10] * x
	addt	$f21, $f27, $f21	# y3 += R[12]

	mult	$f12, $f11, $f13	# x6 = x4 * x2
	mult	$f12, $f12, $f14	# x8 = x4 * x4
	addt	$f22, $f28, $f22	# y4 += R[13]
	addt	$f23, $f29, $f23	# y5 += R[14]

	ldq	$1,   0($30)
	addt	$f24, $f30, $f24	# y6 += R[15]
	mult	$f13, $f12, $f15	# x10 = x6 * x4
	mult	$f20, $f14, $f20	# y2 *= x8

	mult	$f21, $f13, $f21	# y3 *= x6
	lda	$1,1023($1)
	mult	$f22, $f12, $f22	# y4 *= x4
	sll	$1, 52, $1

	mult	$f19, $f15, $f17	# y1 *= x10
	stq	$1,   8($30)
	mult	$f23, $f11, $f23	# y5 *= x2
	addt	$f17, $f20, $f19	# y1 += y2

	addt	$f19, $f21, $f17	# y1 += y3
	addt	$f17, $f22, $f19	# y1 += y4
	ldt	$f25, 8($30)
	addt	$f19, $f23, $f17	# y1 += y5

	addt	$f17, $f24, $f19	# y1 += y6
	addq	$30, 16, $30
	mult	$f19, $f25, $f0
	ret	$31
	.align 4

$SubNormal:
	ldt	$f0, 32($1)		# return 1.0
	addq	$30, 16, $30
	ret
	.align 4

$exception:		/* exp(INF) = INF, exp(NaN) = NaN, exp(-INF) = 0 */
	sll	$2,  1, $0

	lda	$3, 0x7ff
	srl	$0,  53, $0

	cmpeq	$0, $3, $4
	bne	$4, $Inf_or_NaN

	bne	$5, $TooLarge		# Argument is too large
	br	$Zero
	.align 4

$Inf_or_NaN:
	sll	$2, 12, $0
	bne	$0, $NaN
	.align 4

$Infinity:
	blt	$2, $Zero
	sll	$3, 52, $0
	stq	$0,  0($30)
	ldt	$f0, 0($30)
	addq	$30, 16, $30
	ret
	.align 4

$TooLarge:
	blt	$2,$Zero		# too small/negative
	sll	$3, 52, $0
	stq	$0,  0($30)
	ldt	$f0, 0($30)
	addq	$30, 16, $30
	ret
	.align 4

$NaN:
	lda	$0, -1
	stq	$0,  0($30)
	ldt	$f0, 0($30)
	addq	$30, 16, $30
	ret
	.align 4	

$Zero:
	fclr	$f0
	addq	$30, 16, $30
	ret
	.end exp