File: exp.S

package info (click to toggle)
libffm 0.21-2
links: PTS
area: main
in suites: slink
size: 172 kB
ctags: 94
sloc: asm: 1,176; makefile: 76; ansic: 10; sh: 2
file content (249 lines) | stat: -rw-r--r-- 5,570 bytes
/*
   libffm	- Free, pretty fast replacement for some math (libm) routines 
			on Linux/AXP, optimized for the 21164

   Copyright (C) 1998  Joachim Wesner <joachim.wesner@frankfurt.netsurf.de>
                  and  Kazushige Goto <goto@statabo.rim.or.jp>

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with this library (see file COPYING.LIB); if not, write 
   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, 
   MA 02139, USA.
*/
/*
   Fast 2^x, exp(x), 10^x approximations including range reduction 
   by Joachim Wesner, <joachim.wesner@frankfurt.netsurf.de>, 
   see also mc 8/1991 p. 78-93. Done in July 1998.
   
   This version uses a "longer" 11th order polynomial approximation, 
   but requires a somewhat simpler range reduction.

   No special handling of illegal arguments or NANs, yet !!!
*/

/* improved and rescheduled by Kazushige Goto<goto@statabo.rim.or.jp> */

	.set noat
	.set noreorder

#ifdef __ELF__
	.section .rodata
#else
	.rdata
#endif

	.align 5
R:
	.t_floating 1.00000000000000000000e0
	.t_floating 1.442695040888963407e0		# LDE
	.t_floating 6.93359375e-1			# K1
	.t_floating -2.1219444005469060e-4		# K2

	.t_floating  1.00000000000000000000e0
	.t_floating  3.321928094887362347		# LD10
	.t_floating  3.0103000998497010e-001		# J1
	.t_floating -1.4320988897547790e-008		# J2

/*
	2^x (0..1)
	Derived from Chebyshev Approx.
	rel. error 4e-18
*/

	.t_floating 6.29768236459390291646e-10
	.t_floating 1.02577755722569533985e-7
	.t_floating 1.52532278639779265219e-5
	.t_floating 1.33335587324213489593e-3

	.t_floating 5.55041086659065138753e-2
	.t_floating 6.93147180559946507472e-1
	.t_floating 6.54053877719848751881e-9
	.t_floating 1.32077456410841510838e-6

	.t_floating 1.54035094120123436379e-4
	.t_floating 9.61812909725173574571e-3
	.t_floating 2.40226506959042129807e-1
	.t_floating 9.99999999999999996e-01

.text
	.align 5
	.globl exp2
	.ent exp2
exp2:
	cvttqc	$f16, $f10		# double -> int
	ldgp	$29,.-exp2($27)
	lda	$30,   -16($30)
	.frame	$30,16,$26,0

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
	.prologue 1

	lda	$1,  R
	ldt	$f1,  32($1)
	ldt	$f19, 64($1)
	cvtqt	$f10, $f10		# int -> double

	ldt	$f23, 96($1)
	ldt	$f27,128($1)
	ldt	$f20, 72($1)
	ldt	$f21, 80($1)

	ldt	$f22, 88($1)
	ldt	$f24,104($1)
	fbge	$f16,$L2
	subt	$f10, $f1,  $f10	# iexp -= R[0]

$L2:
	ldt	$f25,112($1)
	ldt	$f26,120($1)
	subt	$f16, $f10, $f16	# x -= iexp
	ldt	$f28,136($1)

	ldt	$f29,144($1)
	ldt	$f30,152($1)
	cvttqc	$f10,$f10		# double -> int 
	br	$31, $exp_continue
	.end exp2

	.align 5
	.globl exp10
	.ent exp10
exp10:
	lda	$30,   -16($30)
	ldgp	$29,.-exp10($27)
	lda	$1,    R
	.frame	$30,16,$26,0

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
	.prologue 1

	ldt	$f1,  32($1)
	ldt	$f0,  40($1)		# LDE
	ldt	$f29, 48($1)		# K1
	ldt	$f30, 56($1)		# K2

	br	$31, $continue
	.end exp10

	.align 5
	.globl exp
	.ent exp
exp:
	lda	$30,   -16($30)
	ldgp	$29,.-exp($27)
	lda	$1,    R
	.frame	$30,16,$26,0

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
	.prologue 1

	ldt	$f1,   0($1)
	ldt	$f0,   8($1)		# LDE
	ldt	$f29, 16($1)		# K1
	ldt	$f30, 24($1)		# K2

$continue:
	mult	$f16, $f0, $f10		# iexp = x*LDE
	ldt	$f19, 64($1)
	ldt	$f20, 72($1)
	ldt	$f21, 80($1)

	ldt	$f22, 88($1)
	ldt	$f23, 96($1)
	ldt	$f24,104($1)
	ldt	$f25,112($1)

	cvttqc	$f10, $f10		# double -> int
	ldt	$f26,120($1)
	ldt	$f27,128($1)
	ldt	$f28,136($1)

	cvtqt	$f10, $f10		# int -> double
	fbge	$f16, $L4		# if x>=0 goto $L4
	subt	$f10, $f1,  $f10	# iexp -= R[0]
$L4:
	fbeq	$f10, $L5

	mult	$f10, $f29, $f29	# iexp*K1
	mult	$f10, $f30, $f30	# iexp*K2
	subt	$f16, $f29, $f16	# x - iexp*K1
	subt	$f16, $f30, $f16	# x - iexp*K1

$L5:
	mult	$f16, $f0, $f16		# x *= LDE
	ldt	$f29,144($1)
	ldt	$f30,152($1)
	cvttqc	$f10,$f10		# double -> int 

$exp_continue:
	mult	$f16, $f16, $f11	# x2 = x * x
	mult	$f19, $f16, $f19	# y1 = R[4] * x
	mult	$f20, $f16, $f20	# y2 = R[5] * x
	mult	$f21, $f16, $f21	# y3 = R[6] * x

	mult	$f11, $f11, $f12	# x4 = x2 * x2
	stt	$f10, 0($30)
	mult	$f22, $f16, $f22	# y4 = R[8] * x
	addt	$f19, $f25, $f19	# y1 += R[10]

	mult	$f23, $f16, $f23	# y5 = R[9] * x
	addt	$f20, $f26, $f20	# y2 += R[11]
	mult	$f24, $f16, $f24	# y6 = R[10] * x
	addt	$f21, $f27, $f21	# y3 += R[12]

	mult	$f12, $f11, $f13	# x6 = x4 * x2
	mult	$f12, $f12, $f14	# x8 = x4 * x4
	addt	$f22, $f28, $f22	# y4 += R[13]
	addt	$f23, $f29, $f23	# y5 += R[14]

	ldq	$1,   0($30)
	addt	$f24, $f30, $f24	# y6 += R[15]
	mult	$f13, $f12, $f15	# x10 = x6 * x4
	mult	$f20, $f14, $f20	# y2 *= x8

	mult	$f21, $f13, $f21	# y3 *= x6
	lda	$1,1023($1)
	mult	$f22, $f12, $f22	# y4 *= x4
	sll	$1, 52, $1

	mult	$f19, $f15, $f19	# y1 *= x10
	stq	$1,   8($30)
	mult	$f23, $f11, $f23	# y5 *= x2
	addt	$f19, $f20, $f19	# y1 += y2

	addt	$f19, $f21, $f19	# y1 += y3
	addt	$f19, $f22, $f19	# y1 += y4
	ldt	$f25, 8($30)
	addt	$f19, $f23, $f19	# y1 += y5

	addq	$30, 16, $30
	addt	$f19, $f24, $f19	# y1 += y6
	mult	$f19, $f25, $f0
	ret	$31,($26),1

	.end exp